rbelanec commited on
Commit
dd5e466
·
verified ·
1 Parent(s): d258975

Training in progress, step 3400

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b26b02777a8b7f6d531812689546f4e0292661d634d1062cd4dd7b4ad45ebba7
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077f6172d297e9484da04d81f08213a2515cd9081914f96995243bba8161f701
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -654,3 +654,44 @@
654
  {"current_steps": 3195, "total_steps": 40000, "loss": 6.1542, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:00:32", "remaining_time": "1 day, 10:39:43", "throughput": 399.21, "total_tokens": 4324368}
655
  {"current_steps": 3200, "total_steps": 40000, "loss": 6.6138, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:00:34", "remaining_time": "1 day, 10:36:35", "throughput": 399.76, "total_tokens": 4331088}
656
  {"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.739660263061523, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:11:08", "remaining_time": "1 day, 12:38:06", "throughput": 377.65, "total_tokens": 4331088}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  {"current_steps": 3195, "total_steps": 40000, "loss": 6.1542, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:00:32", "remaining_time": "1 day, 10:39:43", "throughput": 399.21, "total_tokens": 4324368}
655
  {"current_steps": 3200, "total_steps": 40000, "loss": 6.6138, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:00:34", "remaining_time": "1 day, 10:36:35", "throughput": 399.76, "total_tokens": 4331088}
656
  {"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.739660263061523, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:11:08", "remaining_time": "1 day, 12:38:06", "throughput": 377.65, "total_tokens": 4331088}
657
+ {"current_steps": 3205, "total_steps": 40000, "loss": 6.3304, "lr": 0.29527574776499993, "epoch": 0.10258626208309327, "percentage": 8.01, "elapsed_time": "3:11:11", "remaining_time": "1 day, 12:34:58", "throughput": 378.14, "total_tokens": 4337776}
658
+ {"current_steps": 3210, "total_steps": 40000, "loss": 5.5037, "lr": 0.2952610695967558, "epoch": 0.10274630305358172, "percentage": 8.03, "elapsed_time": "3:11:13", "remaining_time": "1 day, 12:31:38", "throughput": 378.66, "total_tokens": 4344624}
659
+ {"current_steps": 3215, "total_steps": 40000, "loss": 6.3196, "lr": 0.29524636902742935, "epoch": 0.10290634402407016, "percentage": 8.04, "elapsed_time": "3:11:15", "remaining_time": "1 day, 12:28:20", "throughput": 379.21, "total_tokens": 4351664}
660
+ {"current_steps": 3220, "total_steps": 40000, "loss": 6.1192, "lr": 0.2952316460592875, "epoch": 0.10306638499455861, "percentage": 8.05, "elapsed_time": "3:11:17", "remaining_time": "1 day, 12:25:02", "throughput": 379.74, "total_tokens": 4358576}
661
+ {"current_steps": 3225, "total_steps": 40000, "loss": 5.7225, "lr": 0.29521690069460066, "epoch": 0.10322642596504705, "percentage": 8.06, "elapsed_time": "3:11:19", "remaining_time": "1 day, 12:21:44", "throughput": 380.25, "total_tokens": 4365216}
662
+ {"current_steps": 3230, "total_steps": 40000, "loss": 5.8469, "lr": 0.29520213293564285, "epoch": 0.10338646693553549, "percentage": 8.08, "elapsed_time": "3:11:21", "remaining_time": "1 day, 12:18:27", "throughput": 380.77, "total_tokens": 4371936}
663
+ {"current_steps": 3235, "total_steps": 40000, "loss": 5.1085, "lr": 0.29518734278469144, "epoch": 0.10354650790602395, "percentage": 8.09, "elapsed_time": "3:11:23", "remaining_time": "1 day, 12:15:10", "throughput": 381.29, "total_tokens": 4378640}
664
+ {"current_steps": 3240, "total_steps": 40000, "loss": 4.9587, "lr": 0.29517253024402723, "epoch": 0.10370654887651239, "percentage": 8.1, "elapsed_time": "3:11:25", "remaining_time": "1 day, 12:11:54", "throughput": 381.8, "total_tokens": 4385248}
665
+ {"current_steps": 3245, "total_steps": 40000, "loss": 5.3926, "lr": 0.2951576953159345, "epoch": 0.10386658984700083, "percentage": 8.11, "elapsed_time": "3:11:27", "remaining_time": "1 day, 12:08:38", "throughput": 382.32, "total_tokens": 4392080}
666
+ {"current_steps": 3250, "total_steps": 40000, "loss": 5.626, "lr": 0.29514283800270097, "epoch": 0.10402663081748928, "percentage": 8.12, "elapsed_time": "3:11:29", "remaining_time": "1 day, 12:05:24", "throughput": 382.84, "total_tokens": 4398784}
667
+ {"current_steps": 3255, "total_steps": 40000, "loss": 5.6925, "lr": 0.2951279583066179, "epoch": 0.10418667178797772, "percentage": 8.14, "elapsed_time": "3:11:31", "remaining_time": "1 day, 12:02:09", "throughput": 383.35, "total_tokens": 4405376}
668
+ {"current_steps": 3260, "total_steps": 40000, "loss": 4.9893, "lr": 0.2951130562299798, "epoch": 0.10434671275846617, "percentage": 8.15, "elapsed_time": "3:11:33", "remaining_time": "1 day, 11:58:56", "throughput": 383.86, "total_tokens": 4412112}
669
+ {"current_steps": 3265, "total_steps": 40000, "loss": 6.1722, "lr": 0.29509813177508487, "epoch": 0.10450675372895461, "percentage": 8.16, "elapsed_time": "3:11:36", "remaining_time": "1 day, 11:55:43", "throughput": 384.38, "total_tokens": 4418816}
670
+ {"current_steps": 3270, "total_steps": 40000, "loss": 5.5052, "lr": 0.2950831849442346, "epoch": 0.10466679469944305, "percentage": 8.18, "elapsed_time": "3:11:38", "remaining_time": "1 day, 11:52:30", "throughput": 384.89, "total_tokens": 4425472}
671
+ {"current_steps": 3275, "total_steps": 40000, "loss": 5.7296, "lr": 0.2950682157397339, "epoch": 0.1048268356699315, "percentage": 8.19, "elapsed_time": "3:11:40", "remaining_time": "1 day, 11:49:18", "throughput": 385.4, "total_tokens": 4432176}
672
+ {"current_steps": 3280, "total_steps": 40000, "loss": 6.2374, "lr": 0.2950532241638914, "epoch": 0.10498687664041995, "percentage": 8.2, "elapsed_time": "3:11:42", "remaining_time": "1 day, 11:46:07", "throughput": 385.92, "total_tokens": 4438928}
673
+ {"current_steps": 3285, "total_steps": 40000, "loss": 6.0145, "lr": 0.2950382102190188, "epoch": 0.10514691761090839, "percentage": 8.21, "elapsed_time": "3:11:44", "remaining_time": "1 day, 11:42:56", "throughput": 386.4, "total_tokens": 4445216}
674
+ {"current_steps": 3290, "total_steps": 40000, "loss": 5.4508, "lr": 0.2950231739074316, "epoch": 0.10530695858139684, "percentage": 8.22, "elapsed_time": "3:11:46", "remaining_time": "1 day, 11:39:45", "throughput": 386.9, "total_tokens": 4451760}
675
+ {"current_steps": 3295, "total_steps": 40000, "loss": 5.5077, "lr": 0.29500811523144843, "epoch": 0.10546699955188528, "percentage": 8.24, "elapsed_time": "3:11:48", "remaining_time": "1 day, 11:36:36", "throughput": 387.44, "total_tokens": 4458752}
676
+ {"current_steps": 3300, "total_steps": 40000, "loss": 6.0674, "lr": 0.2949930341933917, "epoch": 0.10562704052237373, "percentage": 8.25, "elapsed_time": "3:11:50", "remaining_time": "1 day, 11:33:27", "throughput": 387.97, "total_tokens": 4465632}
677
+ {"current_steps": 3305, "total_steps": 40000, "loss": 6.0529, "lr": 0.29497793079558693, "epoch": 0.10578708149286217, "percentage": 8.26, "elapsed_time": "3:11:52", "remaining_time": "1 day, 11:30:18", "throughput": 388.48, "total_tokens": 4472224}
678
+ {"current_steps": 3310, "total_steps": 40000, "loss": 5.5106, "lr": 0.2949628050403633, "epoch": 0.10594712246335061, "percentage": 8.28, "elapsed_time": "3:11:54", "remaining_time": "1 day, 11:27:10", "throughput": 388.97, "total_tokens": 4478624}
679
+ {"current_steps": 3315, "total_steps": 40000, "loss": 5.0853, "lr": 0.2949476569300535, "epoch": 0.10610716343383907, "percentage": 8.29, "elapsed_time": "3:11:56", "remaining_time": "1 day, 11:24:03", "throughput": 389.49, "total_tokens": 4485488}
680
+ {"current_steps": 3320, "total_steps": 40000, "loss": 6.4041, "lr": 0.29493248646699344, "epoch": 0.1062672044043275, "percentage": 8.3, "elapsed_time": "3:11:58", "remaining_time": "1 day, 11:20:57", "throughput": 390.05, "total_tokens": 4492736}
681
+ {"current_steps": 3325, "total_steps": 40000, "loss": 5.7114, "lr": 0.29491729365352265, "epoch": 0.10642724537481596, "percentage": 8.31, "elapsed_time": "3:12:00", "remaining_time": "1 day, 11:17:51", "throughput": 390.57, "total_tokens": 4499504}
682
+ {"current_steps": 3330, "total_steps": 40000, "loss": 5.8162, "lr": 0.29490207849198397, "epoch": 0.1065872863453044, "percentage": 8.33, "elapsed_time": "3:12:02", "remaining_time": "1 day, 11:14:45", "throughput": 391.08, "total_tokens": 4506192}
683
+ {"current_steps": 3335, "total_steps": 40000, "loss": 5.4379, "lr": 0.29488684098472384, "epoch": 0.10674732731579284, "percentage": 8.34, "elapsed_time": "3:12:04", "remaining_time": "1 day, 11:11:40", "throughput": 391.59, "total_tokens": 4512880}
684
+ {"current_steps": 3340, "total_steps": 40000, "loss": 5.2765, "lr": 0.2948715811340921, "epoch": 0.10690736828628129, "percentage": 8.35, "elapsed_time": "3:12:06", "remaining_time": "1 day, 11:08:36", "throughput": 392.12, "total_tokens": 4519824}
685
+ {"current_steps": 3345, "total_steps": 40000, "loss": 6.32, "lr": 0.294856298942442, "epoch": 0.10706740925676973, "percentage": 8.36, "elapsed_time": "3:12:08", "remaining_time": "1 day, 11:05:32", "throughput": 392.66, "total_tokens": 4526880}
686
+ {"current_steps": 3350, "total_steps": 40000, "loss": 5.5361, "lr": 0.2948409944121302, "epoch": 0.10722745022725817, "percentage": 8.38, "elapsed_time": "3:12:10", "remaining_time": "1 day, 11:02:30", "throughput": 393.23, "total_tokens": 4534304}
687
+ {"current_steps": 3355, "total_steps": 40000, "loss": 5.4795, "lr": 0.29482566754551687, "epoch": 0.10738749119774663, "percentage": 8.39, "elapsed_time": "3:12:12", "remaining_time": "1 day, 10:59:27", "throughput": 393.74, "total_tokens": 4540880}
688
+ {"current_steps": 3360, "total_steps": 40000, "loss": 5.937, "lr": 0.2948103183449656, "epoch": 0.10754753216823507, "percentage": 8.4, "elapsed_time": "3:12:14", "remaining_time": "1 day, 10:56:24", "throughput": 394.24, "total_tokens": 4547520}
689
+ {"current_steps": 3365, "total_steps": 40000, "loss": 6.1634, "lr": 0.2947949468128435, "epoch": 0.10770757313872352, "percentage": 8.41, "elapsed_time": "3:12:16", "remaining_time": "1 day, 10:53:22", "throughput": 394.76, "total_tokens": 4554240}
690
+ {"current_steps": 3370, "total_steps": 40000, "loss": 6.0973, "lr": 0.2947795529515209, "epoch": 0.10786761410921196, "percentage": 8.43, "elapsed_time": "3:12:18", "remaining_time": "1 day, 10:50:21", "throughput": 395.27, "total_tokens": 4561040}
691
+ {"current_steps": 3375, "total_steps": 40000, "loss": 5.5619, "lr": 0.29476413676337193, "epoch": 0.1080276550797004, "percentage": 8.44, "elapsed_time": "3:12:20", "remaining_time": "1 day, 10:47:21", "throughput": 395.8, "total_tokens": 4567920}
692
+ {"current_steps": 3380, "total_steps": 40000, "loss": 5.2372, "lr": 0.2947486982507738, "epoch": 0.10818769605018885, "percentage": 8.45, "elapsed_time": "3:12:23", "remaining_time": "1 day, 10:44:20", "throughput": 396.31, "total_tokens": 4574592}
693
+ {"current_steps": 3385, "total_steps": 40000, "loss": 6.1078, "lr": 0.29473323741610735, "epoch": 0.10834773702067729, "percentage": 8.46, "elapsed_time": "3:12:25", "remaining_time": "1 day, 10:41:20", "throughput": 396.81, "total_tokens": 4581184}
694
+ {"current_steps": 3390, "total_steps": 40000, "loss": 6.0475, "lr": 0.2947177542617569, "epoch": 0.10850777799116573, "percentage": 8.48, "elapsed_time": "3:12:27", "remaining_time": "1 day, 10:38:21", "throughput": 397.33, "total_tokens": 4587968}
695
+ {"current_steps": 3395, "total_steps": 40000, "loss": 5.4173, "lr": 0.2947022487901101, "epoch": 0.10866781896165419, "percentage": 8.49, "elapsed_time": "3:12:29", "remaining_time": "1 day, 10:35:22", "throughput": 397.81, "total_tokens": 4594368}
696
+ {"current_steps": 3400, "total_steps": 40000, "loss": 6.1053, "lr": 0.2946867210035581, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:12:31", "remaining_time": "1 day, 10:32:25", "throughput": 398.38, "total_tokens": 4601728}
697
+ {"current_steps": 3400, "total_steps": 40000, "eval_loss": 5.716908931732178, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:23:05", "remaining_time": "1 day, 12:26:14", "throughput": 377.64, "total_tokens": 4601728}