rbelanec commited on
Commit
b3e639f
·
verified ·
1 Parent(s): 51d6d30

Training in progress, step 3400

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3e38596a5e8f3e9a1bdc4b90d776edf22a9385c1f460cc108163f1c8980f0c
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d19ddbccb1734cd355ddee30937370fd5559ac8e537d30816e80addf62b4d8
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -654,3 +654,44 @@
654
  {"current_steps": 3195, "total_steps": 40000, "loss": 6.0947, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:03:12", "remaining_time": "1 day, 11:10:32", "throughput": 393.38, "total_tokens": 4324368}
655
  {"current_steps": 3200, "total_steps": 40000, "loss": 6.6672, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:03:14", "remaining_time": "1 day, 11:07:20", "throughput": 393.92, "total_tokens": 4331088}
656
  {"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.711532115936279, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:13:58", "remaining_time": "1 day, 13:10:39", "throughput": 372.14, "total_tokens": 4331088}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  {"current_steps": 3195, "total_steps": 40000, "loss": 6.0947, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:03:12", "remaining_time": "1 day, 11:10:32", "throughput": 393.38, "total_tokens": 4324368}
655
  {"current_steps": 3200, "total_steps": 40000, "loss": 6.6672, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:03:14", "remaining_time": "1 day, 11:07:20", "throughput": 393.92, "total_tokens": 4331088}
656
  {"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.711532115936279, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:13:58", "remaining_time": "1 day, 13:10:39", "throughput": 372.14, "total_tokens": 4331088}
657
+ {"current_steps": 3205, "total_steps": 40000, "loss": 6.3491, "lr": 0.29527574776499993, "epoch": 0.10258626208309327, "percentage": 8.01, "elapsed_time": "3:14:01", "remaining_time": "1 day, 13:07:30", "throughput": 372.61, "total_tokens": 4337776}
658
+ {"current_steps": 3210, "total_steps": 40000, "loss": 5.5269, "lr": 0.2952610695967558, "epoch": 0.10274630305358172, "percentage": 8.03, "elapsed_time": "3:14:03", "remaining_time": "1 day, 13:04:08", "throughput": 373.13, "total_tokens": 4344624}
659
+ {"current_steps": 3215, "total_steps": 40000, "loss": 6.3063, "lr": 0.29524636902742935, "epoch": 0.10290634402407016, "percentage": 8.04, "elapsed_time": "3:14:05", "remaining_time": "1 day, 13:00:46", "throughput": 373.67, "total_tokens": 4351664}
660
+ {"current_steps": 3220, "total_steps": 40000, "loss": 6.1282, "lr": 0.2952316460592875, "epoch": 0.10306638499455861, "percentage": 8.05, "elapsed_time": "3:14:07", "remaining_time": "1 day, 12:57:25", "throughput": 374.2, "total_tokens": 4358576}
661
+ {"current_steps": 3225, "total_steps": 40000, "loss": 5.7549, "lr": 0.29521690069460066, "epoch": 0.10322642596504705, "percentage": 8.06, "elapsed_time": "3:14:09", "remaining_time": "1 day, 12:54:04", "throughput": 374.7, "total_tokens": 4365216}
662
+ {"current_steps": 3230, "total_steps": 40000, "loss": 5.9063, "lr": 0.29520213293564285, "epoch": 0.10338646693553549, "percentage": 8.08, "elapsed_time": "3:14:11", "remaining_time": "1 day, 12:50:44", "throughput": 375.21, "total_tokens": 4371936}
663
+ {"current_steps": 3235, "total_steps": 40000, "loss": 5.0874, "lr": 0.29518734278469144, "epoch": 0.10354650790602395, "percentage": 8.09, "elapsed_time": "3:14:13", "remaining_time": "1 day, 12:47:24", "throughput": 375.72, "total_tokens": 4378640}
664
+ {"current_steps": 3240, "total_steps": 40000, "loss": 4.9757, "lr": 0.29517253024402723, "epoch": 0.10370654887651239, "percentage": 8.1, "elapsed_time": "3:14:15", "remaining_time": "1 day, 12:44:05", "throughput": 376.22, "total_tokens": 4385248}
665
+ {"current_steps": 3245, "total_steps": 40000, "loss": 5.3763, "lr": 0.2951576953159345, "epoch": 0.10386658984700083, "percentage": 8.11, "elapsed_time": "3:14:18", "remaining_time": "1 day, 12:40:46", "throughput": 376.74, "total_tokens": 4392080}
666
+ {"current_steps": 3250, "total_steps": 40000, "loss": 5.6235, "lr": 0.29514283800270097, "epoch": 0.10402663081748928, "percentage": 8.12, "elapsed_time": "3:14:20", "remaining_time": "1 day, 12:37:28", "throughput": 377.25, "total_tokens": 4398784}
667
+ {"current_steps": 3255, "total_steps": 40000, "loss": 5.7336, "lr": 0.2951279583066179, "epoch": 0.10418667178797772, "percentage": 8.14, "elapsed_time": "3:14:22", "remaining_time": "1 day, 12:34:11", "throughput": 377.75, "total_tokens": 4405376}
668
+ {"current_steps": 3260, "total_steps": 40000, "loss": 4.9623, "lr": 0.2951130562299798, "epoch": 0.10434671275846617, "percentage": 8.15, "elapsed_time": "3:14:24", "remaining_time": "1 day, 12:30:54", "throughput": 378.26, "total_tokens": 4412112}
669
+ {"current_steps": 3265, "total_steps": 40000, "loss": 6.2228, "lr": 0.29509813177508487, "epoch": 0.10450675372895461, "percentage": 8.16, "elapsed_time": "3:14:26", "remaining_time": "1 day, 12:27:38", "throughput": 378.77, "total_tokens": 4418816}
670
+ {"current_steps": 3270, "total_steps": 40000, "loss": 5.5275, "lr": 0.2950831849442346, "epoch": 0.10466679469944305, "percentage": 8.18, "elapsed_time": "3:14:28", "remaining_time": "1 day, 12:24:23", "throughput": 379.27, "total_tokens": 4425472}
671
+ {"current_steps": 3275, "total_steps": 40000, "loss": 5.694, "lr": 0.2950682157397339, "epoch": 0.1048268356699315, "percentage": 8.19, "elapsed_time": "3:14:30", "remaining_time": "1 day, 12:21:08", "throughput": 379.78, "total_tokens": 4432176}
672
+ {"current_steps": 3280, "total_steps": 40000, "loss": 6.2268, "lr": 0.2950532241638914, "epoch": 0.10498687664041995, "percentage": 8.2, "elapsed_time": "3:14:32", "remaining_time": "1 day, 12:17:53", "throughput": 380.29, "total_tokens": 4438928}
673
+ {"current_steps": 3285, "total_steps": 40000, "loss": 6.0347, "lr": 0.2950382102190188, "epoch": 0.10514691761090839, "percentage": 8.21, "elapsed_time": "3:14:34", "remaining_time": "1 day, 12:14:39", "throughput": 380.77, "total_tokens": 4445216}
674
+ {"current_steps": 3290, "total_steps": 40000, "loss": 5.4231, "lr": 0.2950231739074316, "epoch": 0.10530695858139684, "percentage": 8.22, "elapsed_time": "3:14:36", "remaining_time": "1 day, 12:11:26", "throughput": 381.26, "total_tokens": 4451760}
675
+ {"current_steps": 3295, "total_steps": 40000, "loss": 5.4835, "lr": 0.29500811523144843, "epoch": 0.10546699955188528, "percentage": 8.24, "elapsed_time": "3:14:38", "remaining_time": "1 day, 12:08:14", "throughput": 381.79, "total_tokens": 4458752}
676
+ {"current_steps": 3300, "total_steps": 40000, "loss": 6.0333, "lr": 0.2949930341933917, "epoch": 0.10562704052237373, "percentage": 8.25, "elapsed_time": "3:14:40", "remaining_time": "1 day, 12:05:02", "throughput": 382.31, "total_tokens": 4465632}
677
+ {"current_steps": 3305, "total_steps": 40000, "loss": 6.0783, "lr": 0.29497793079558693, "epoch": 0.10578708149286217, "percentage": 8.26, "elapsed_time": "3:14:42", "remaining_time": "1 day, 12:01:50", "throughput": 382.81, "total_tokens": 4472224}
678
+ {"current_steps": 3310, "total_steps": 40000, "loss": 5.506, "lr": 0.2949628050403633, "epoch": 0.10594712246335061, "percentage": 8.28, "elapsed_time": "3:14:44", "remaining_time": "1 day, 11:58:38", "throughput": 383.29, "total_tokens": 4478624}
679
+ {"current_steps": 3315, "total_steps": 40000, "loss": 5.1095, "lr": 0.2949476569300535, "epoch": 0.10610716343383907, "percentage": 8.29, "elapsed_time": "3:14:46", "remaining_time": "1 day, 11:55:28", "throughput": 383.81, "total_tokens": 4485488}
680
+ {"current_steps": 3320, "total_steps": 40000, "loss": 6.4218, "lr": 0.29493248646699344, "epoch": 0.1062672044043275, "percentage": 8.3, "elapsed_time": "3:14:48", "remaining_time": "1 day, 11:52:20", "throughput": 384.36, "total_tokens": 4492736}
681
+ {"current_steps": 3325, "total_steps": 40000, "loss": 5.7727, "lr": 0.29491729365352265, "epoch": 0.10642724537481596, "percentage": 8.31, "elapsed_time": "3:14:50", "remaining_time": "1 day, 11:49:11", "throughput": 384.87, "total_tokens": 4499504}
682
+ {"current_steps": 3330, "total_steps": 40000, "loss": 5.8368, "lr": 0.29490207849198397, "epoch": 0.1065872863453044, "percentage": 8.33, "elapsed_time": "3:14:52", "remaining_time": "1 day, 11:46:02", "throughput": 385.38, "total_tokens": 4506192}
683
+ {"current_steps": 3335, "total_steps": 40000, "loss": 5.4325, "lr": 0.29488684098472384, "epoch": 0.10674732731579284, "percentage": 8.34, "elapsed_time": "3:14:54", "remaining_time": "1 day, 11:42:54", "throughput": 385.88, "total_tokens": 4512880}
684
+ {"current_steps": 3340, "total_steps": 40000, "loss": 5.2622, "lr": 0.2948715811340921, "epoch": 0.10690736828628129, "percentage": 8.35, "elapsed_time": "3:14:57", "remaining_time": "1 day, 11:39:47", "throughput": 386.41, "total_tokens": 4519824}
685
+ {"current_steps": 3345, "total_steps": 40000, "loss": 6.3227, "lr": 0.294856298942442, "epoch": 0.10706740925676973, "percentage": 8.36, "elapsed_time": "3:14:59", "remaining_time": "1 day, 11:36:41", "throughput": 386.94, "total_tokens": 4526880}
686
+ {"current_steps": 3350, "total_steps": 40000, "loss": 5.5516, "lr": 0.2948409944121302, "epoch": 0.10722745022725817, "percentage": 8.38, "elapsed_time": "3:15:01", "remaining_time": "1 day, 11:33:35", "throughput": 387.5, "total_tokens": 4534304}
687
+ {"current_steps": 3355, "total_steps": 40000, "loss": 5.4318, "lr": 0.29482566754551687, "epoch": 0.10738749119774663, "percentage": 8.39, "elapsed_time": "3:15:03", "remaining_time": "1 day, 11:30:29", "throughput": 388.0, "total_tokens": 4540880}
688
+ {"current_steps": 3360, "total_steps": 40000, "loss": 5.9571, "lr": 0.2948103183449656, "epoch": 0.10754753216823507, "percentage": 8.4, "elapsed_time": "3:15:05", "remaining_time": "1 day, 11:27:24", "throughput": 388.5, "total_tokens": 4547520}
689
+ {"current_steps": 3365, "total_steps": 40000, "loss": 6.1462, "lr": 0.2947949468128435, "epoch": 0.10770757313872352, "percentage": 8.41, "elapsed_time": "3:15:07", "remaining_time": "1 day, 11:24:19", "throughput": 389.0, "total_tokens": 4554240}
690
+ {"current_steps": 3370, "total_steps": 40000, "loss": 6.0525, "lr": 0.2947795529515209, "epoch": 0.10786761410921196, "percentage": 8.43, "elapsed_time": "3:15:09", "remaining_time": "1 day, 11:21:15", "throughput": 389.52, "total_tokens": 4561040}
691
+ {"current_steps": 3375, "total_steps": 40000, "loss": 5.5309, "lr": 0.29476413676337193, "epoch": 0.1080276550797004, "percentage": 8.44, "elapsed_time": "3:15:11", "remaining_time": "1 day, 11:18:12", "throughput": 390.03, "total_tokens": 4567920}
692
+ {"current_steps": 3380, "total_steps": 40000, "loss": 5.2485, "lr": 0.2947486982507738, "epoch": 0.10818769605018885, "percentage": 8.45, "elapsed_time": "3:15:13", "remaining_time": "1 day, 11:15:09", "throughput": 390.54, "total_tokens": 4574592}
693
+ {"current_steps": 3385, "total_steps": 40000, "loss": 6.0799, "lr": 0.29473323741610735, "epoch": 0.10834773702067729, "percentage": 8.46, "elapsed_time": "3:15:15", "remaining_time": "1 day, 11:12:06", "throughput": 391.03, "total_tokens": 4581184}
694
+ {"current_steps": 3390, "total_steps": 40000, "loss": 6.0579, "lr": 0.2947177542617569, "epoch": 0.10850777799116573, "percentage": 8.48, "elapsed_time": "3:15:17", "remaining_time": "1 day, 11:09:04", "throughput": 391.54, "total_tokens": 4587968}
695
+ {"current_steps": 3395, "total_steps": 40000, "loss": 5.4002, "lr": 0.2947022487901101, "epoch": 0.10866781896165419, "percentage": 8.49, "elapsed_time": "3:15:19", "remaining_time": "1 day, 11:06:02", "throughput": 392.02, "total_tokens": 4594368}
696
+ {"current_steps": 3400, "total_steps": 40000, "loss": 6.1049, "lr": 0.2946867210035581, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:15:21", "remaining_time": "1 day, 11:03:02", "throughput": 392.58, "total_tokens": 4601728}
697
+ {"current_steps": 3400, "total_steps": 40000, "eval_loss": 5.739169597625732, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:26:06", "remaining_time": "1 day, 12:58:37", "throughput": 372.12, "total_tokens": 4601728}