rbelanec commited on
Commit
9e3dae3
verified
1 Parent(s): 8efb310

Training in progress, step 19240

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +193 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b67333603f23b0b76cffb29c42fcfd470165a07dcc0d61ba36e0374ef0069ae2
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f11761671c7a490cf9b30a236f6158d27b708ad1db53e644d08cb72659dec2
3
  size 798032
trainer_log.jsonl CHANGED
@@ -3674,3 +3674,196 @@
3674
  {"current_steps": 18278, "total_steps": 19240, "eval_loss": 0.145976260304451, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:47:03", "remaining_time": "0:02:28", "throughput": 1232.9, "total_tokens": 3480592}
3675
  {"current_steps": 18280, "total_steps": 19240, "loss": 0.0859, "lr": 3.7901896751541545e-07, "epoch": 9.5010395010395, "percentage": 95.01, "elapsed_time": "0:47:04", "remaining_time": "0:02:28", "throughput": 1232.56, "total_tokens": 3481008}
3676
  {"current_steps": 18285, "total_steps": 19240, "loss": 0.1885, "lr": 3.750950884300108e-07, "epoch": 9.503638253638254, "percentage": 95.04, "elapsed_time": "0:47:04", "remaining_time": "0:02:27", "throughput": 1232.58, "total_tokens": 3481968}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3674
  {"current_steps": 18278, "total_steps": 19240, "eval_loss": 0.145976260304451, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:47:03", "remaining_time": "0:02:28", "throughput": 1232.9, "total_tokens": 3480592}
3675
  {"current_steps": 18280, "total_steps": 19240, "loss": 0.0859, "lr": 3.7901896751541545e-07, "epoch": 9.5010395010395, "percentage": 95.01, "elapsed_time": "0:47:04", "remaining_time": "0:02:28", "throughput": 1232.56, "total_tokens": 3481008}
3676
  {"current_steps": 18285, "total_steps": 19240, "loss": 0.1885, "lr": 3.750950884300108e-07, "epoch": 9.503638253638254, "percentage": 95.04, "elapsed_time": "0:47:04", "remaining_time": "0:02:27", "throughput": 1232.58, "total_tokens": 3481968}
3677
+ {"current_steps": 18290, "total_steps": 19240, "loss": 0.0637, "lr": 3.71191473062571e-07, "epoch": 9.506237006237006, "percentage": 95.06, "elapsed_time": "0:47:05", "remaining_time": "0:02:26", "throughput": 1232.61, "total_tokens": 3482960}
3678
+ {"current_steps": 18295, "total_steps": 19240, "loss": 0.1556, "lr": 3.6730812462535404e-07, "epoch": 9.508835758835758, "percentage": 95.09, "elapsed_time": "0:47:06", "remaining_time": "0:02:25", "throughput": 1232.63, "total_tokens": 3483888}
3679
+ {"current_steps": 18300, "total_steps": 19240, "loss": 0.2641, "lr": 3.6344504631395934e-07, "epoch": 9.511434511434512, "percentage": 95.11, "elapsed_time": "0:47:07", "remaining_time": "0:02:25", "throughput": 1232.66, "total_tokens": 3484880}
3680
+ {"current_steps": 18305, "total_steps": 19240, "loss": 0.0942, "lr": 3.5960224130728857e-07, "epoch": 9.514033264033264, "percentage": 95.14, "elapsed_time": "0:47:07", "remaining_time": "0:02:24", "throughput": 1232.67, "total_tokens": 3485808}
3681
+ {"current_steps": 18310, "total_steps": 19240, "loss": 0.23, "lr": 3.5577971276757325e-07, "epoch": 9.516632016632016, "percentage": 95.17, "elapsed_time": "0:47:08", "remaining_time": "0:02:23", "throughput": 1232.69, "total_tokens": 3486736}
3682
+ {"current_steps": 18315, "total_steps": 19240, "loss": 0.0773, "lr": 3.519774638403472e-07, "epoch": 9.51923076923077, "percentage": 95.19, "elapsed_time": "0:47:09", "remaining_time": "0:02:22", "throughput": 1232.68, "total_tokens": 3487600}
3683
+ {"current_steps": 18320, "total_steps": 19240, "loss": 0.0677, "lr": 3.481954976544716e-07, "epoch": 9.521829521829522, "percentage": 95.22, "elapsed_time": "0:47:10", "remaining_time": "0:02:22", "throughput": 1232.69, "total_tokens": 3488528}
3684
+ {"current_steps": 18325, "total_steps": 19240, "loss": 0.1652, "lr": 3.44433817322104e-07, "epoch": 9.524428274428274, "percentage": 95.24, "elapsed_time": "0:47:10", "remaining_time": "0:02:21", "throughput": 1232.71, "total_tokens": 3489488}
3685
+ {"current_steps": 18330, "total_steps": 19240, "loss": 0.0938, "lr": 3.406924259387101e-07, "epoch": 9.527027027027026, "percentage": 95.27, "elapsed_time": "0:47:11", "remaining_time": "0:02:20", "throughput": 1232.72, "total_tokens": 3490384}
3686
+ {"current_steps": 18335, "total_steps": 19240, "loss": 0.2343, "lr": 3.369713265830715e-07, "epoch": 9.52962577962578, "percentage": 95.3, "elapsed_time": "0:47:12", "remaining_time": "0:02:19", "throughput": 1232.74, "total_tokens": 3491344}
3687
+ {"current_steps": 18340, "total_steps": 19240, "loss": 0.0825, "lr": 3.3327052231725276e-07, "epoch": 9.532224532224532, "percentage": 95.32, "elapsed_time": "0:47:12", "remaining_time": "0:02:19", "throughput": 1232.78, "total_tokens": 3492336}
3688
+ {"current_steps": 18345, "total_steps": 19240, "loss": 0.1382, "lr": 3.2959001618664e-07, "epoch": 9.534823284823284, "percentage": 95.35, "elapsed_time": "0:47:13", "remaining_time": "0:02:18", "throughput": 1232.8, "total_tokens": 3493296}
3689
+ {"current_steps": 18350, "total_steps": 19240, "loss": 0.1539, "lr": 3.2592981121989384e-07, "epoch": 9.537422037422038, "percentage": 95.37, "elapsed_time": "0:47:14", "remaining_time": "0:02:17", "throughput": 1232.83, "total_tokens": 3494256}
3690
+ {"current_steps": 18355, "total_steps": 19240, "loss": 0.1374, "lr": 3.222899104289856e-07, "epoch": 9.54002079002079, "percentage": 95.4, "elapsed_time": "0:47:15", "remaining_time": "0:02:16", "throughput": 1232.86, "total_tokens": 3495248}
3691
+ {"current_steps": 18360, "total_steps": 19240, "loss": 0.1407, "lr": 3.18670316809172e-07, "epoch": 9.542619542619542, "percentage": 95.43, "elapsed_time": "0:47:15", "remaining_time": "0:02:15", "throughput": 1232.87, "total_tokens": 3496176}
3692
+ {"current_steps": 18365, "total_steps": 19240, "loss": 0.1467, "lr": 3.150710333389983e-07, "epoch": 9.545218295218294, "percentage": 95.45, "elapsed_time": "0:47:16", "remaining_time": "0:02:15", "throughput": 1232.91, "total_tokens": 3497168}
3693
+ {"current_steps": 18370, "total_steps": 19240, "loss": 0.1275, "lr": 3.114920629802981e-07, "epoch": 9.547817047817048, "percentage": 95.48, "elapsed_time": "0:47:17", "remaining_time": "0:02:14", "throughput": 1232.93, "total_tokens": 3498128}
3694
+ {"current_steps": 18375, "total_steps": 19240, "loss": 0.046, "lr": 3.0793340867818763e-07, "epoch": 9.5504158004158, "percentage": 95.5, "elapsed_time": "0:47:17", "remaining_time": "0:02:13", "throughput": 1232.95, "total_tokens": 3499056}
3695
+ {"current_steps": 18380, "total_steps": 19240, "loss": 0.1966, "lr": 3.04395073361069e-07, "epoch": 9.553014553014552, "percentage": 95.53, "elapsed_time": "0:47:18", "remaining_time": "0:02:12", "throughput": 1233.01, "total_tokens": 3500112}
3696
+ {"current_steps": 18385, "total_steps": 19240, "loss": 0.1504, "lr": 3.008770599406213e-07, "epoch": 9.555613305613306, "percentage": 95.56, "elapsed_time": "0:47:19", "remaining_time": "0:02:12", "throughput": 1233.03, "total_tokens": 3501072}
3697
+ {"current_steps": 18390, "total_steps": 19240, "loss": 0.0824, "lr": 2.973793713118039e-07, "epoch": 9.558212058212058, "percentage": 95.58, "elapsed_time": "0:47:20", "remaining_time": "0:02:11", "throughput": 1233.05, "total_tokens": 3502032}
3698
+ {"current_steps": 18395, "total_steps": 19240, "loss": 0.1407, "lr": 2.9390201035284226e-07, "epoch": 9.56081081081081, "percentage": 95.61, "elapsed_time": "0:47:20", "remaining_time": "0:02:10", "throughput": 1233.1, "total_tokens": 3503056}
3699
+ {"current_steps": 18400, "total_steps": 19240, "loss": 0.1187, "lr": 2.904449799252418e-07, "epoch": 9.563409563409563, "percentage": 95.63, "elapsed_time": "0:47:21", "remaining_time": "0:02:09", "throughput": 1233.14, "total_tokens": 3504048}
3700
+ {"current_steps": 18405, "total_steps": 19240, "loss": 0.2303, "lr": 2.870082828737797e-07, "epoch": 9.566008316008316, "percentage": 95.66, "elapsed_time": "0:47:22", "remaining_time": "0:02:08", "throughput": 1233.16, "total_tokens": 3505008}
3701
+ {"current_steps": 18410, "total_steps": 19240, "loss": 0.1368, "lr": 2.8359192202649376e-07, "epoch": 9.568607068607069, "percentage": 95.69, "elapsed_time": "0:47:23", "remaining_time": "0:02:08", "throughput": 1233.17, "total_tokens": 3505936}
3702
+ {"current_steps": 18415, "total_steps": 19240, "loss": 0.1063, "lr": 2.8019590019469633e-07, "epoch": 9.57120582120582, "percentage": 95.71, "elapsed_time": "0:47:23", "remaining_time": "0:02:07", "throughput": 1233.21, "total_tokens": 3506928}
3703
+ {"current_steps": 18420, "total_steps": 19240, "loss": 0.1296, "lr": 2.7682022017295197e-07, "epoch": 9.573804573804575, "percentage": 95.74, "elapsed_time": "0:47:24", "remaining_time": "0:02:06", "throughput": 1233.24, "total_tokens": 3507920}
3704
+ {"current_steps": 18425, "total_steps": 19240, "loss": 0.1841, "lr": 2.734648847390997e-07, "epoch": 9.576403326403327, "percentage": 95.76, "elapsed_time": "0:47:25", "remaining_time": "0:02:05", "throughput": 1233.26, "total_tokens": 3508848}
3705
+ {"current_steps": 18430, "total_steps": 19240, "loss": 0.1619, "lr": 2.7012989665421706e-07, "epoch": 9.579002079002079, "percentage": 95.79, "elapsed_time": "0:47:25", "remaining_time": "0:02:05", "throughput": 1233.27, "total_tokens": 3509776}
3706
+ {"current_steps": 18435, "total_steps": 19240, "loss": 0.0535, "lr": 2.6681525866266157e-07, "epoch": 9.58160083160083, "percentage": 95.82, "elapsed_time": "0:47:26", "remaining_time": "0:02:04", "throughput": 1233.28, "total_tokens": 3510704}
3707
+ {"current_steps": 18440, "total_steps": 19240, "loss": 0.1335, "lr": 2.635209734920291e-07, "epoch": 9.584199584199585, "percentage": 95.84, "elapsed_time": "0:47:27", "remaining_time": "0:02:03", "throughput": 1233.3, "total_tokens": 3511632}
3708
+ {"current_steps": 18445, "total_steps": 19240, "loss": 0.0584, "lr": 2.602470438531679e-07, "epoch": 9.586798336798337, "percentage": 95.87, "elapsed_time": "0:47:28", "remaining_time": "0:02:02", "throughput": 1233.29, "total_tokens": 3512496}
3709
+ {"current_steps": 18450, "total_steps": 19240, "loss": 0.1572, "lr": 2.5699347244018404e-07, "epoch": 9.589397089397089, "percentage": 95.89, "elapsed_time": "0:47:28", "remaining_time": "0:02:01", "throughput": 1233.3, "total_tokens": 3513424}
3710
+ {"current_steps": 18455, "total_steps": 19240, "loss": 0.1832, "lr": 2.537602619304247e-07, "epoch": 9.591995841995843, "percentage": 95.92, "elapsed_time": "0:47:29", "remaining_time": "0:02:01", "throughput": 1233.32, "total_tokens": 3514352}
3711
+ {"current_steps": 18460, "total_steps": 19240, "loss": 0.2152, "lr": 2.5054741498448386e-07, "epoch": 9.594594594594595, "percentage": 95.95, "elapsed_time": "0:47:30", "remaining_time": "0:02:00", "throughput": 1233.35, "total_tokens": 3515344}
3712
+ {"current_steps": 18465, "total_steps": 19240, "loss": 0.1097, "lr": 2.4735493424619394e-07, "epoch": 9.597193347193347, "percentage": 95.97, "elapsed_time": "0:47:30", "remaining_time": "0:01:59", "throughput": 1233.35, "total_tokens": 3516240}
3713
+ {"current_steps": 18470, "total_steps": 19240, "loss": 0.3515, "lr": 2.4418282234263957e-07, "epoch": 9.5997920997921, "percentage": 96.0, "elapsed_time": "0:47:31", "remaining_time": "0:01:58", "throughput": 1233.39, "total_tokens": 3517232}
3714
+ {"current_steps": 18475, "total_steps": 19240, "loss": 0.2509, "lr": 2.410310818841299e-07, "epoch": 9.602390852390853, "percentage": 96.02, "elapsed_time": "0:47:32", "remaining_time": "0:01:58", "throughput": 1233.36, "total_tokens": 3518032}
3715
+ {"current_steps": 18480, "total_steps": 19240, "loss": 0.1548, "lr": 2.3789971546422374e-07, "epoch": 9.604989604989605, "percentage": 96.05, "elapsed_time": "0:47:33", "remaining_time": "0:01:57", "throughput": 1233.38, "total_tokens": 3518992}
3716
+ {"current_steps": 18485, "total_steps": 19240, "loss": 0.1366, "lr": 2.3478872565969867e-07, "epoch": 9.607588357588357, "percentage": 96.08, "elapsed_time": "0:47:33", "remaining_time": "0:01:56", "throughput": 1233.42, "total_tokens": 3519984}
3717
+ {"current_steps": 18490, "total_steps": 19240, "loss": 0.1376, "lr": 2.316981150305847e-07, "epoch": 9.61018711018711, "percentage": 96.1, "elapsed_time": "0:47:34", "remaining_time": "0:01:55", "throughput": 1233.42, "total_tokens": 3520880}
3718
+ {"current_steps": 18495, "total_steps": 19240, "loss": 0.1202, "lr": 2.2862788612012244e-07, "epoch": 9.612785862785863, "percentage": 96.13, "elapsed_time": "0:47:35", "remaining_time": "0:01:55", "throughput": 1233.41, "total_tokens": 3521744}
3719
+ {"current_steps": 18500, "total_steps": 19240, "loss": 0.1241, "lr": 2.255780414547909e-07, "epoch": 9.615384615384615, "percentage": 96.15, "elapsed_time": "0:47:36", "remaining_time": "0:01:54", "throughput": 1233.46, "total_tokens": 3522768}
3720
+ {"current_steps": 18505, "total_steps": 19240, "loss": 0.1096, "lr": 2.2254858354429364e-07, "epoch": 9.617983367983367, "percentage": 96.18, "elapsed_time": "0:47:36", "remaining_time": "0:01:53", "throughput": 1233.5, "total_tokens": 3523760}
3721
+ {"current_steps": 18510, "total_steps": 19240, "loss": 0.2221, "lr": 2.19539514881556e-07, "epoch": 9.620582120582121, "percentage": 96.21, "elapsed_time": "0:47:37", "remaining_time": "0:01:52", "throughput": 1233.52, "total_tokens": 3524720}
3722
+ {"current_steps": 18515, "total_steps": 19240, "loss": 0.0711, "lr": 2.165508379427278e-07, "epoch": 9.623180873180873, "percentage": 96.23, "elapsed_time": "0:47:38", "remaining_time": "0:01:51", "throughput": 1233.56, "total_tokens": 3525712}
3723
+ {"current_steps": 18520, "total_steps": 19240, "loss": 0.2272, "lr": 2.1358255518717786e-07, "epoch": 9.625779625779625, "percentage": 96.26, "elapsed_time": "0:47:38", "remaining_time": "0:01:51", "throughput": 1233.55, "total_tokens": 3526576}
3724
+ {"current_steps": 18525, "total_steps": 19240, "loss": 0.1241, "lr": 2.106346690574912e-07, "epoch": 9.628378378378379, "percentage": 96.28, "elapsed_time": "0:47:39", "remaining_time": "0:01:50", "throughput": 1233.56, "total_tokens": 3527504}
3725
+ {"current_steps": 18530, "total_steps": 19240, "loss": 0.1585, "lr": 2.0770718197946625e-07, "epoch": 9.630977130977131, "percentage": 96.31, "elapsed_time": "0:47:40", "remaining_time": "0:01:49", "throughput": 1233.61, "total_tokens": 3528528}
3726
+ {"current_steps": 18535, "total_steps": 19240, "loss": 0.1111, "lr": 2.0480009636212327e-07, "epoch": 9.633575883575883, "percentage": 96.34, "elapsed_time": "0:47:41", "remaining_time": "0:01:48", "throughput": 1233.61, "total_tokens": 3529424}
3727
+ {"current_steps": 18540, "total_steps": 19240, "loss": 0.1169, "lr": 2.0191341459768475e-07, "epoch": 9.636174636174637, "percentage": 96.36, "elapsed_time": "0:47:41", "remaining_time": "0:01:48", "throughput": 1233.65, "total_tokens": 3530416}
3728
+ {"current_steps": 18545, "total_steps": 19240, "loss": 0.085, "lr": 1.9904713906159224e-07, "epoch": 9.638773388773389, "percentage": 96.39, "elapsed_time": "0:47:42", "remaining_time": "0:01:47", "throughput": 1233.67, "total_tokens": 3531376}
3729
+ {"current_steps": 18550, "total_steps": 19240, "loss": 0.1707, "lr": 1.9620127211248672e-07, "epoch": 9.641372141372141, "percentage": 96.41, "elapsed_time": "0:47:43", "remaining_time": "0:01:46", "throughput": 1233.68, "total_tokens": 3532272}
3730
+ {"current_steps": 18555, "total_steps": 19240, "loss": 0.1602, "lr": 1.9337581609222277e-07, "epoch": 9.643970893970893, "percentage": 96.44, "elapsed_time": "0:47:43", "remaining_time": "0:01:45", "throughput": 1233.71, "total_tokens": 3533264}
3731
+ {"current_steps": 18560, "total_steps": 19240, "loss": 0.1006, "lr": 1.9057077332584883e-07, "epoch": 9.646569646569647, "percentage": 96.47, "elapsed_time": "0:47:44", "remaining_time": "0:01:44", "throughput": 1233.71, "total_tokens": 3534160}
3732
+ {"current_steps": 18565, "total_steps": 19240, "loss": 0.1361, "lr": 1.8778614612162404e-07, "epoch": 9.6491683991684, "percentage": 96.49, "elapsed_time": "0:47:45", "remaining_time": "0:01:44", "throughput": 1233.76, "total_tokens": 3535184}
3733
+ {"current_steps": 18570, "total_steps": 19240, "loss": 0.1566, "lr": 1.850219367710071e-07, "epoch": 9.651767151767151, "percentage": 96.52, "elapsed_time": "0:47:46", "remaining_time": "0:01:43", "throughput": 1233.78, "total_tokens": 3536144}
3734
+ {"current_steps": 18575, "total_steps": 19240, "loss": 0.1784, "lr": 1.8227814754865068e-07, "epoch": 9.654365904365905, "percentage": 96.54, "elapsed_time": "0:47:46", "remaining_time": "0:01:42", "throughput": 1233.81, "total_tokens": 3537104}
3735
+ {"current_steps": 18580, "total_steps": 19240, "loss": 0.1173, "lr": 1.7955478071240706e-07, "epoch": 9.656964656964657, "percentage": 96.57, "elapsed_time": "0:47:47", "remaining_time": "0:01:41", "throughput": 1233.86, "total_tokens": 3538128}
3736
+ {"current_steps": 18585, "total_steps": 19240, "loss": 0.1488, "lr": 1.7685183850331965e-07, "epoch": 9.65956340956341, "percentage": 96.6, "elapsed_time": "0:47:48", "remaining_time": "0:01:41", "throughput": 1233.87, "total_tokens": 3539056}
3737
+ {"current_steps": 18590, "total_steps": 19240, "loss": 0.0844, "lr": 1.7416932314562872e-07, "epoch": 9.662162162162161, "percentage": 96.62, "elapsed_time": "0:47:48", "remaining_time": "0:01:40", "throughput": 1233.88, "total_tokens": 3539984}
3738
+ {"current_steps": 18595, "total_steps": 19240, "loss": 0.192, "lr": 1.7150723684676572e-07, "epoch": 9.664760914760915, "percentage": 96.65, "elapsed_time": "0:47:49", "remaining_time": "0:01:39", "throughput": 1233.89, "total_tokens": 3540880}
3739
+ {"current_steps": 18600, "total_steps": 19240, "loss": 0.1514, "lr": 1.6886558179734225e-07, "epoch": 9.667359667359667, "percentage": 96.67, "elapsed_time": "0:47:50", "remaining_time": "0:01:38", "throughput": 1233.91, "total_tokens": 3541840}
3740
+ {"current_steps": 18605, "total_steps": 19240, "loss": 0.1234, "lr": 1.662443601711694e-07, "epoch": 9.66995841995842, "percentage": 96.7, "elapsed_time": "0:47:51", "remaining_time": "0:01:37", "throughput": 1233.93, "total_tokens": 3542768}
3741
+ {"current_steps": 18610, "total_steps": 19240, "loss": 0.1529, "lr": 1.6364357412523845e-07, "epoch": 9.672557172557173, "percentage": 96.73, "elapsed_time": "0:47:51", "remaining_time": "0:01:37", "throughput": 1233.94, "total_tokens": 3543696}
3742
+ {"current_steps": 18615, "total_steps": 19240, "loss": 0.2198, "lr": 1.6106322579972077e-07, "epoch": 9.675155925155925, "percentage": 96.75, "elapsed_time": "0:47:52", "remaining_time": "0:01:36", "throughput": 1233.98, "total_tokens": 3544688}
3743
+ {"current_steps": 18620, "total_steps": 19240, "loss": 0.177, "lr": 1.585033173179734e-07, "epoch": 9.677754677754677, "percentage": 96.78, "elapsed_time": "0:47:53", "remaining_time": "0:01:35", "throughput": 1233.99, "total_tokens": 3545616}
3744
+ {"current_steps": 18625, "total_steps": 19240, "loss": 0.0708, "lr": 1.5596385078653353e-07, "epoch": 9.68035343035343, "percentage": 96.8, "elapsed_time": "0:47:54", "remaining_time": "0:01:34", "throughput": 1234.01, "total_tokens": 3546576}
3745
+ {"current_steps": 18630, "total_steps": 19240, "loss": 0.2169, "lr": 1.5344482829511842e-07, "epoch": 9.682952182952183, "percentage": 96.83, "elapsed_time": "0:47:54", "remaining_time": "0:01:34", "throughput": 1234.0, "total_tokens": 3547440}
3746
+ {"current_steps": 18635, "total_steps": 19240, "loss": 0.1022, "lr": 1.5094625191661715e-07, "epoch": 9.685550935550935, "percentage": 96.86, "elapsed_time": "0:47:55", "remaining_time": "0:01:33", "throughput": 1234.03, "total_tokens": 3548400}
3747
+ {"current_steps": 18640, "total_steps": 19240, "loss": 0.0949, "lr": 1.4846812370709617e-07, "epoch": 9.688149688149688, "percentage": 96.88, "elapsed_time": "0:47:56", "remaining_time": "0:01:32", "throughput": 1234.06, "total_tokens": 3549392}
3748
+ {"current_steps": 18645, "total_steps": 19240, "loss": 0.1785, "lr": 1.4601044570579647e-07, "epoch": 9.690748440748441, "percentage": 96.91, "elapsed_time": "0:47:56", "remaining_time": "0:01:31", "throughput": 1234.11, "total_tokens": 3550416}
3749
+ {"current_steps": 18650, "total_steps": 19240, "loss": 0.2378, "lr": 1.4357321993513084e-07, "epoch": 9.693347193347194, "percentage": 96.93, "elapsed_time": "0:47:57", "remaining_time": "0:01:31", "throughput": 1234.15, "total_tokens": 3551408}
3750
+ {"current_steps": 18655, "total_steps": 19240, "loss": 0.1287, "lr": 1.4115644840067833e-07, "epoch": 9.695945945945946, "percentage": 96.96, "elapsed_time": "0:47:58", "remaining_time": "0:01:30", "throughput": 1234.01, "total_tokens": 3552304}
3751
+ {"current_steps": 18660, "total_steps": 19240, "loss": 0.1645, "lr": 1.3876013309118697e-07, "epoch": 9.698544698544698, "percentage": 96.99, "elapsed_time": "0:47:59", "remaining_time": "0:01:29", "throughput": 1234.05, "total_tokens": 3553296}
3752
+ {"current_steps": 18665, "total_steps": 19240, "loss": 0.1472, "lr": 1.363842759785794e-07, "epoch": 9.701143451143452, "percentage": 97.01, "elapsed_time": "0:48:00", "remaining_time": "0:01:28", "throughput": 1234.1, "total_tokens": 3554320}
3753
+ {"current_steps": 18670, "total_steps": 19240, "loss": 0.1313, "lr": 1.3402887901793338e-07, "epoch": 9.703742203742204, "percentage": 97.04, "elapsed_time": "0:48:00", "remaining_time": "0:01:27", "throughput": 1234.14, "total_tokens": 3555344}
3754
+ {"current_steps": 18675, "total_steps": 19240, "loss": 0.2033, "lr": 1.316939441474957e-07, "epoch": 9.706340956340956, "percentage": 97.06, "elapsed_time": "0:48:01", "remaining_time": "0:01:27", "throughput": 1234.19, "total_tokens": 3556368}
3755
+ {"current_steps": 18680, "total_steps": 19240, "loss": 0.1369, "lr": 1.2937947328867106e-07, "epoch": 9.70893970893971, "percentage": 97.09, "elapsed_time": "0:48:02", "remaining_time": "0:01:26", "throughput": 1234.22, "total_tokens": 3557360}
3756
+ {"current_steps": 18685, "total_steps": 19240, "loss": 0.1836, "lr": 1.270854683460304e-07, "epoch": 9.711538461538462, "percentage": 97.12, "elapsed_time": "0:48:02", "remaining_time": "0:01:25", "throughput": 1234.26, "total_tokens": 3558352}
3757
+ {"current_steps": 18690, "total_steps": 19240, "loss": 0.0751, "lr": 1.2481193120729427e-07, "epoch": 9.714137214137214, "percentage": 97.14, "elapsed_time": "0:48:03", "remaining_time": "0:01:24", "throughput": 1234.3, "total_tokens": 3559376}
3758
+ {"current_steps": 18695, "total_steps": 19240, "loss": 0.123, "lr": 1.2255886374334946e-07, "epoch": 9.716735966735968, "percentage": 97.17, "elapsed_time": "0:48:04", "remaining_time": "0:01:24", "throughput": 1234.32, "total_tokens": 3560336}
3759
+ {"current_steps": 18700, "total_steps": 19240, "loss": 0.1969, "lr": 1.203262678082323e-07, "epoch": 9.71933471933472, "percentage": 97.19, "elapsed_time": "0:48:05", "remaining_time": "0:01:23", "throughput": 1234.32, "total_tokens": 3561232}
3760
+ {"current_steps": 18705, "total_steps": 19240, "loss": 0.1962, "lr": 1.1811414523913711e-07, "epoch": 9.721933471933472, "percentage": 97.22, "elapsed_time": "0:48:05", "remaining_time": "0:01:22", "throughput": 1234.35, "total_tokens": 3562192}
3761
+ {"current_steps": 18710, "total_steps": 19240, "loss": 0.2642, "lr": 1.1592249785641052e-07, "epoch": 9.724532224532224, "percentage": 97.25, "elapsed_time": "0:48:06", "remaining_time": "0:01:21", "throughput": 1234.36, "total_tokens": 3563120}
3762
+ {"current_steps": 18715, "total_steps": 19240, "loss": 0.1003, "lr": 1.1375132746354322e-07, "epoch": 9.727130977130978, "percentage": 97.27, "elapsed_time": "0:48:07", "remaining_time": "0:01:20", "throughput": 1234.39, "total_tokens": 3564080}
3763
+ {"current_steps": 18720, "total_steps": 19240, "loss": 0.1054, "lr": 1.1160063584718661e-07, "epoch": 9.72972972972973, "percentage": 97.3, "elapsed_time": "0:48:08", "remaining_time": "0:01:20", "throughput": 1234.41, "total_tokens": 3565040}
3764
+ {"current_steps": 18725, "total_steps": 19240, "loss": 0.1825, "lr": 1.0947042477713332e-07, "epoch": 9.732328482328482, "percentage": 97.32, "elapsed_time": "0:48:08", "remaining_time": "0:01:19", "throughput": 1234.43, "total_tokens": 3565968}
3765
+ {"current_steps": 18730, "total_steps": 19240, "loss": 0.0682, "lr": 1.0736069600632281e-07, "epoch": 9.734927234927234, "percentage": 97.35, "elapsed_time": "0:48:09", "remaining_time": "0:01:18", "throughput": 1234.43, "total_tokens": 3566864}
3766
+ {"current_steps": 18735, "total_steps": 19240, "loss": 0.099, "lr": 1.0527145127084136e-07, "epoch": 9.737525987525988, "percentage": 97.38, "elapsed_time": "0:48:10", "remaining_time": "0:01:17", "throughput": 1234.4, "total_tokens": 3567664}
3767
+ {"current_steps": 18740, "total_steps": 19240, "loss": 0.1665, "lr": 1.032026922899193e-07, "epoch": 9.74012474012474, "percentage": 97.4, "elapsed_time": "0:48:10", "remaining_time": "0:01:17", "throughput": 1234.41, "total_tokens": 3568592}
3768
+ {"current_steps": 18745, "total_steps": 19240, "loss": 0.2079, "lr": 1.0115442076592541e-07, "epoch": 9.742723492723492, "percentage": 97.43, "elapsed_time": "0:48:11", "remaining_time": "0:01:16", "throughput": 1234.42, "total_tokens": 3569520}
3769
+ {"current_steps": 18750, "total_steps": 19240, "loss": 0.0977, "lr": 9.912663838437808e-08, "epoch": 9.745322245322246, "percentage": 97.45, "elapsed_time": "0:48:12", "remaining_time": "0:01:15", "throughput": 1234.44, "total_tokens": 3570480}
3770
+ {"current_steps": 18755, "total_steps": 19240, "loss": 0.0709, "lr": 9.711934681392587e-08, "epoch": 9.747920997920998, "percentage": 97.48, "elapsed_time": "0:48:13", "remaining_time": "0:01:14", "throughput": 1234.46, "total_tokens": 3571408}
3771
+ {"current_steps": 18760, "total_steps": 19240, "loss": 0.0792, "lr": 9.513254770636137e-08, "epoch": 9.75051975051975, "percentage": 97.51, "elapsed_time": "0:48:13", "remaining_time": "0:01:14", "throughput": 1234.47, "total_tokens": 3572336}
3772
+ {"current_steps": 18765, "total_steps": 19240, "loss": 0.0866, "lr": 9.31662426966129e-08, "epoch": 9.753118503118504, "percentage": 97.53, "elapsed_time": "0:48:14", "remaining_time": "0:01:13", "throughput": 1234.46, "total_tokens": 3573200}
3773
+ {"current_steps": 18770, "total_steps": 19240, "loss": 0.1524, "lr": 9.122043340273889e-08, "epoch": 9.755717255717256, "percentage": 97.56, "elapsed_time": "0:48:15", "remaining_time": "0:01:12", "throughput": 1234.48, "total_tokens": 3574160}
3774
+ {"current_steps": 18775, "total_steps": 19240, "loss": 0.1843, "lr": 8.929512142594187e-08, "epoch": 9.758316008316008, "percentage": 97.58, "elapsed_time": "0:48:15", "remaining_time": "0:01:11", "throughput": 1234.51, "total_tokens": 3575120}
3775
+ {"current_steps": 18780, "total_steps": 19240, "loss": 0.1473, "lr": 8.739030835055173e-08, "epoch": 9.76091476091476, "percentage": 97.61, "elapsed_time": "0:48:16", "remaining_time": "0:01:10", "throughput": 1234.51, "total_tokens": 3576016}
3776
+ {"current_steps": 18785, "total_steps": 19240, "loss": 0.1883, "lr": 8.550599574402574e-08, "epoch": 9.763513513513514, "percentage": 97.64, "elapsed_time": "0:48:17", "remaining_time": "0:01:10", "throughput": 1234.55, "total_tokens": 3577008}
3777
+ {"current_steps": 18790, "total_steps": 19240, "loss": 0.1806, "lr": 8.364218515695965e-08, "epoch": 9.766112266112266, "percentage": 97.66, "elapsed_time": "0:48:18", "remaining_time": "0:01:09", "throughput": 1234.57, "total_tokens": 3577968}
3778
+ {"current_steps": 18795, "total_steps": 19240, "loss": 0.1103, "lr": 8.179887812307386e-08, "epoch": 9.768711018711018, "percentage": 97.69, "elapsed_time": "0:48:18", "remaining_time": "0:01:08", "throughput": 1234.57, "total_tokens": 3578864}
3779
+ {"current_steps": 18800, "total_steps": 19240, "loss": 0.1879, "lr": 7.99760761592161e-08, "epoch": 9.771309771309772, "percentage": 97.71, "elapsed_time": "0:48:19", "remaining_time": "0:01:07", "throughput": 1234.6, "total_tokens": 3579824}
3780
+ {"current_steps": 18805, "total_steps": 19240, "loss": 0.2112, "lr": 7.817378076536153e-08, "epoch": 9.773908523908524, "percentage": 97.74, "elapsed_time": "0:48:20", "remaining_time": "0:01:07", "throughput": 1234.63, "total_tokens": 3580816}
3781
+ {"current_steps": 18810, "total_steps": 19240, "loss": 0.1194, "lr": 7.63919934246099e-08, "epoch": 9.776507276507276, "percentage": 97.77, "elapsed_time": "0:48:21", "remaining_time": "0:01:06", "throughput": 1234.66, "total_tokens": 3581776}
3782
+ {"current_steps": 18815, "total_steps": 19240, "loss": 0.1668, "lr": 7.463071560318835e-08, "epoch": 9.779106029106028, "percentage": 97.79, "elapsed_time": "0:48:21", "remaining_time": "0:01:05", "throughput": 1234.68, "total_tokens": 3582736}
3783
+ {"current_steps": 18820, "total_steps": 19240, "loss": 0.1391, "lr": 7.288994875044308e-08, "epoch": 9.781704781704782, "percentage": 97.82, "elapsed_time": "0:48:22", "remaining_time": "0:01:04", "throughput": 1234.68, "total_tokens": 3583632}
3784
+ {"current_steps": 18825, "total_steps": 19240, "loss": 0.1092, "lr": 7.116969429883935e-08, "epoch": 9.784303534303534, "percentage": 97.84, "elapsed_time": "0:48:23", "remaining_time": "0:01:04", "throughput": 1234.73, "total_tokens": 3584656}
3785
+ {"current_steps": 18830, "total_steps": 19240, "loss": 0.1545, "lr": 6.946995366397257e-08, "epoch": 9.786902286902286, "percentage": 97.87, "elapsed_time": "0:48:23", "remaining_time": "0:01:03", "throughput": 1234.74, "total_tokens": 3585584}
3786
+ {"current_steps": 18835, "total_steps": 19240, "loss": 0.0904, "lr": 6.779072824454614e-08, "epoch": 9.78950103950104, "percentage": 97.9, "elapsed_time": "0:48:24", "remaining_time": "0:01:02", "throughput": 1234.75, "total_tokens": 3586512}
3787
+ {"current_steps": 18840, "total_steps": 19240, "loss": 0.217, "lr": 6.6132019422388e-08, "epoch": 9.792099792099792, "percentage": 97.92, "elapsed_time": "0:48:25", "remaining_time": "0:01:01", "throughput": 1234.77, "total_tokens": 3587440}
3788
+ {"current_steps": 18845, "total_steps": 19240, "loss": 0.1826, "lr": 6.449382856244246e-08, "epoch": 9.794698544698544, "percentage": 97.95, "elapsed_time": "0:48:26", "remaining_time": "0:01:00", "throughput": 1234.78, "total_tokens": 3588368}
3789
+ {"current_steps": 18850, "total_steps": 19240, "loss": 0.1242, "lr": 6.287615701277005e-08, "epoch": 9.797297297297296, "percentage": 97.97, "elapsed_time": "0:48:26", "remaining_time": "0:01:00", "throughput": 1234.8, "total_tokens": 3589328}
3790
+ {"current_steps": 18855, "total_steps": 19240, "loss": 0.1695, "lr": 6.127900610454207e-08, "epoch": 9.79989604989605, "percentage": 98.0, "elapsed_time": "0:48:27", "remaining_time": "0:00:59", "throughput": 1234.84, "total_tokens": 3590320}
3791
+ {"current_steps": 18860, "total_steps": 19240, "loss": 0.1013, "lr": 5.970237715204885e-08, "epoch": 9.802494802494802, "percentage": 98.02, "elapsed_time": "0:48:28", "remaining_time": "0:00:58", "throughput": 1234.86, "total_tokens": 3591280}
3792
+ {"current_steps": 18865, "total_steps": 19240, "loss": 0.1092, "lr": 5.814627145269147e-08, "epoch": 9.805093555093555, "percentage": 98.05, "elapsed_time": "0:48:28", "remaining_time": "0:00:57", "throughput": 1234.9, "total_tokens": 3592272}
3793
+ {"current_steps": 18870, "total_steps": 19240, "loss": 0.1682, "lr": 5.661069028697896e-08, "epoch": 9.807692307692308, "percentage": 98.08, "elapsed_time": "0:48:29", "remaining_time": "0:00:57", "throughput": 1234.89, "total_tokens": 3593136}
3794
+ {"current_steps": 18875, "total_steps": 19240, "loss": 0.1996, "lr": 5.509563491853942e-08, "epoch": 9.81029106029106, "percentage": 98.1, "elapsed_time": "0:48:30", "remaining_time": "0:00:56", "throughput": 1234.91, "total_tokens": 3594096}
3795
+ {"current_steps": 18880, "total_steps": 19240, "loss": 0.1309, "lr": 5.3601106594097784e-08, "epoch": 9.812889812889813, "percentage": 98.13, "elapsed_time": "0:48:31", "remaining_time": "0:00:55", "throughput": 1234.94, "total_tokens": 3595056}
3796
+ {"current_steps": 18885, "total_steps": 19240, "loss": 0.1019, "lr": 5.2127106543498063e-08, "epoch": 9.815488565488565, "percentage": 98.15, "elapsed_time": "0:48:31", "remaining_time": "0:00:54", "throughput": 1234.95, "total_tokens": 3595984}
3797
+ {"current_steps": 18890, "total_steps": 19240, "loss": 0.1081, "lr": 5.0673635979686665e-08, "epoch": 9.818087318087318, "percentage": 98.18, "elapsed_time": "0:48:32", "remaining_time": "0:00:53", "throughput": 1234.95, "total_tokens": 3596880}
3798
+ {"current_steps": 18895, "total_steps": 19240, "loss": 0.0659, "lr": 4.924069609872073e-08, "epoch": 9.82068607068607, "percentage": 98.21, "elapsed_time": "0:48:33", "remaining_time": "0:00:53", "throughput": 1234.97, "total_tokens": 3597808}
3799
+ {"current_steps": 18900, "total_steps": 19240, "loss": 0.122, "lr": 4.7828288079757035e-08, "epoch": 9.823284823284823, "percentage": 98.23, "elapsed_time": "0:48:34", "remaining_time": "0:00:52", "throughput": 1234.99, "total_tokens": 3598768}
3800
+ {"current_steps": 18905, "total_steps": 19240, "loss": 0.1074, "lr": 4.643641308505753e-08, "epoch": 9.825883575883577, "percentage": 98.26, "elapsed_time": "0:48:34", "remaining_time": "0:00:51", "throughput": 1234.99, "total_tokens": 3599664}
3801
+ {"current_steps": 18910, "total_steps": 19240, "loss": 0.2222, "lr": 4.50650722599949e-08, "epoch": 9.828482328482329, "percentage": 98.28, "elapsed_time": "0:48:35", "remaining_time": "0:00:50", "throughput": 1235.01, "total_tokens": 3600592}
3802
+ {"current_steps": 18915, "total_steps": 19240, "loss": 0.1791, "lr": 4.3714266733035914e-08, "epoch": 9.83108108108108, "percentage": 98.31, "elapsed_time": "0:48:36", "remaining_time": "0:00:50", "throughput": 1235.03, "total_tokens": 3601552}
3803
+ {"current_steps": 18920, "total_steps": 19240, "loss": 0.2204, "lr": 4.238399761574974e-08, "epoch": 9.833679833679835, "percentage": 98.34, "elapsed_time": "0:48:36", "remaining_time": "0:00:49", "throughput": 1235.05, "total_tokens": 3602512}
3804
+ {"current_steps": 18925, "total_steps": 19240, "loss": 0.0948, "lr": 4.10742660028135e-08, "epoch": 9.836278586278587, "percentage": 98.36, "elapsed_time": "0:48:37", "remaining_time": "0:00:48", "throughput": 1235.08, "total_tokens": 3603472}
3805
+ {"current_steps": 18930, "total_steps": 19240, "loss": 0.1072, "lr": 3.978507297199285e-08, "epoch": 9.838877338877339, "percentage": 98.39, "elapsed_time": "0:48:38", "remaining_time": "0:00:47", "throughput": 1235.09, "total_tokens": 3604400}
3806
+ {"current_steps": 18935, "total_steps": 19240, "loss": 0.1202, "lr": 3.851641958416696e-08, "epoch": 9.84147609147609, "percentage": 98.41, "elapsed_time": "0:48:39", "remaining_time": "0:00:47", "throughput": 1235.12, "total_tokens": 3605360}
3807
+ {"current_steps": 18940, "total_steps": 19240, "loss": 0.1556, "lr": 3.7268306883297966e-08, "epoch": 9.844074844074845, "percentage": 98.44, "elapsed_time": "0:48:39", "remaining_time": "0:00:46", "throughput": 1235.17, "total_tokens": 3606384}
3808
+ {"current_steps": 18945, "total_steps": 19240, "loss": 0.2043, "lr": 3.604073589645596e-08, "epoch": 9.846673596673597, "percentage": 98.47, "elapsed_time": "0:48:40", "remaining_time": "0:00:45", "throughput": 1235.18, "total_tokens": 3607312}
3809
+ {"current_steps": 18950, "total_steps": 19240, "loss": 0.2799, "lr": 3.4833707633799565e-08, "epoch": 9.849272349272349, "percentage": 98.49, "elapsed_time": "0:48:41", "remaining_time": "0:00:44", "throughput": 1235.16, "total_tokens": 3608144}
3810
+ {"current_steps": 18955, "total_steps": 19240, "loss": 0.1193, "lr": 3.3647223088589805e-08, "epoch": 9.851871101871101, "percentage": 98.52, "elapsed_time": "0:48:41", "remaining_time": "0:00:43", "throughput": 1235.19, "total_tokens": 3609104}
3811
+ {"current_steps": 18960, "total_steps": 19240, "loss": 0.0994, "lr": 3.248128323717625e-08, "epoch": 9.854469854469855, "percentage": 98.54, "elapsed_time": "0:48:42", "remaining_time": "0:00:43", "throughput": 1235.22, "total_tokens": 3610096}
3812
+ {"current_steps": 18965, "total_steps": 19240, "loss": 0.0855, "lr": 3.133588903900808e-08, "epoch": 9.857068607068607, "percentage": 98.57, "elapsed_time": "0:48:43", "remaining_time": "0:00:42", "throughput": 1235.25, "total_tokens": 3611056}
3813
+ {"current_steps": 18970, "total_steps": 19240, "loss": 0.2762, "lr": 3.021104143662301e-08, "epoch": 9.859667359667359, "percentage": 98.6, "elapsed_time": "0:48:44", "remaining_time": "0:00:41", "throughput": 1235.27, "total_tokens": 3612016}
3814
+ {"current_steps": 18975, "total_steps": 19240, "loss": 0.0773, "lr": 2.910674135565561e-08, "epoch": 9.862266112266113, "percentage": 98.62, "elapsed_time": "0:48:44", "remaining_time": "0:00:40", "throughput": 1235.29, "total_tokens": 3612944}
3815
+ {"current_steps": 18980, "total_steps": 19240, "loss": 0.1463, "lr": 2.8022989704826196e-08, "epoch": 9.864864864864865, "percentage": 98.65, "elapsed_time": "0:48:45", "remaining_time": "0:00:40", "throughput": 1235.32, "total_tokens": 3613936}
3816
+ {"current_steps": 18985, "total_steps": 19240, "loss": 0.2657, "lr": 2.6959787375949174e-08, "epoch": 9.867463617463617, "percentage": 98.67, "elapsed_time": "0:48:46", "remaining_time": "0:00:39", "throughput": 1235.32, "total_tokens": 3614832}
3817
+ {"current_steps": 18990, "total_steps": 19240, "loss": 0.0911, "lr": 2.5917135243930245e-08, "epoch": 9.87006237006237, "percentage": 98.7, "elapsed_time": "0:48:46", "remaining_time": "0:00:38", "throughput": 1235.36, "total_tokens": 3615824}
3818
+ {"current_steps": 18995, "total_steps": 19240, "loss": 0.129, "lr": 2.4895034166760865e-08, "epoch": 9.872661122661123, "percentage": 98.73, "elapsed_time": "0:48:47", "remaining_time": "0:00:37", "throughput": 1235.36, "total_tokens": 3616720}
3819
+ {"current_steps": 19000, "total_steps": 19240, "loss": 0.1295, "lr": 2.389348498552657e-08, "epoch": 9.875259875259875, "percentage": 98.75, "elapsed_time": "0:48:48", "remaining_time": "0:00:36", "throughput": 1235.41, "total_tokens": 3617744}
3820
+ {"current_steps": 19005, "total_steps": 19240, "loss": 0.1245, "lr": 2.2912488524393095e-08, "epoch": 9.877858627858627, "percentage": 98.78, "elapsed_time": "0:48:49", "remaining_time": "0:00:36", "throughput": 1235.44, "total_tokens": 3618736}
3821
+ {"current_steps": 19010, "total_steps": 19240, "loss": 0.1321, "lr": 2.1952045590620253e-08, "epoch": 9.880457380457381, "percentage": 98.8, "elapsed_time": "0:48:49", "remaining_time": "0:00:35", "throughput": 1235.46, "total_tokens": 3619664}
3822
+ {"current_steps": 19015, "total_steps": 19240, "loss": 0.133, "lr": 2.101215697455361e-08, "epoch": 9.883056133056133, "percentage": 98.83, "elapsed_time": "0:48:50", "remaining_time": "0:00:34", "throughput": 1235.49, "total_tokens": 3620656}
3823
+ {"current_steps": 19020, "total_steps": 19240, "loss": 0.2742, "lr": 2.0092823449618935e-08, "epoch": 9.885654885654885, "percentage": 98.86, "elapsed_time": "0:48:51", "remaining_time": "0:00:33", "throughput": 1235.53, "total_tokens": 3621648}
3824
+ {"current_steps": 19025, "total_steps": 19240, "loss": 0.1386, "lr": 1.9194045772336077e-08, "epoch": 9.888253638253639, "percentage": 98.88, "elapsed_time": "0:48:51", "remaining_time": "0:00:33", "throughput": 1235.54, "total_tokens": 3622576}
3825
+ {"current_steps": 19030, "total_steps": 19240, "loss": 0.1066, "lr": 1.831582468229953e-08, "epoch": 9.890852390852391, "percentage": 98.91, "elapsed_time": "0:48:52", "remaining_time": "0:00:32", "throughput": 1235.57, "total_tokens": 3623536}
3826
+ {"current_steps": 19035, "total_steps": 19240, "loss": 0.1831, "lr": 1.7458160902197872e-08, "epoch": 9.893451143451143, "percentage": 98.93, "elapsed_time": "0:48:53", "remaining_time": "0:00:31", "throughput": 1235.62, "total_tokens": 3624592}
3827
+ {"current_steps": 19040, "total_steps": 19240, "loss": 0.0859, "lr": 1.6621055137797105e-08, "epoch": 9.896049896049895, "percentage": 98.96, "elapsed_time": "0:48:54", "remaining_time": "0:00:30", "throughput": 1235.64, "total_tokens": 3625520}
3828
+ {"current_steps": 19045, "total_steps": 19240, "loss": 0.1854, "lr": 1.5804508077946202e-08, "epoch": 9.89864864864865, "percentage": 98.99, "elapsed_time": "0:48:54", "remaining_time": "0:00:30", "throughput": 1235.66, "total_tokens": 3626480}
3829
+ {"current_steps": 19050, "total_steps": 19240, "loss": 0.1209, "lr": 1.500852039458267e-08, "epoch": 9.901247401247401, "percentage": 99.01, "elapsed_time": "0:48:55", "remaining_time": "0:00:29", "throughput": 1235.68, "total_tokens": 3627408}
3830
+ {"current_steps": 19055, "total_steps": 19240, "loss": 0.1319, "lr": 1.4233092742713116e-08, "epoch": 9.903846153846153, "percentage": 99.04, "elapsed_time": "0:48:56", "remaining_time": "0:00:28", "throughput": 1235.69, "total_tokens": 3628336}
3831
+ {"current_steps": 19060, "total_steps": 19240, "loss": 0.1695, "lr": 1.3478225760441e-08, "epoch": 9.906444906444907, "percentage": 99.06, "elapsed_time": "0:48:57", "remaining_time": "0:00:27", "throughput": 1235.68, "total_tokens": 3629200}
3832
+ {"current_steps": 19065, "total_steps": 19240, "loss": 0.0999, "lr": 1.2743920068938874e-08, "epoch": 9.90904365904366, "percentage": 99.09, "elapsed_time": "0:48:57", "remaining_time": "0:00:26", "throughput": 1235.72, "total_tokens": 3630192}
3833
+ {"current_steps": 19070, "total_steps": 19240, "loss": 0.1278, "lr": 1.203017627246228e-08, "epoch": 9.911642411642411, "percentage": 99.12, "elapsed_time": "0:48:58", "remaining_time": "0:00:26", "throughput": 1235.77, "total_tokens": 3631248}
3834
+ {"current_steps": 19075, "total_steps": 19240, "loss": 0.1757, "lr": 1.1336994958349723e-08, "epoch": 9.914241164241163, "percentage": 99.14, "elapsed_time": "0:48:59", "remaining_time": "0:00:25", "throughput": 1235.79, "total_tokens": 3632176}
3835
+ {"current_steps": 19080, "total_steps": 19240, "loss": 0.103, "lr": 1.0664376697017142e-08, "epoch": 9.916839916839917, "percentage": 99.17, "elapsed_time": "0:48:59", "remaining_time": "0:00:24", "throughput": 1235.81, "total_tokens": 3633136}
3836
+ {"current_steps": 19085, "total_steps": 19240, "loss": 0.1741, "lr": 1.0012322041960676e-08, "epoch": 9.91943866943867, "percentage": 99.19, "elapsed_time": "0:49:00", "remaining_time": "0:00:23", "throughput": 1235.81, "total_tokens": 3634032}
3837
+ {"current_steps": 19090, "total_steps": 19240, "loss": 0.2007, "lr": 9.38083152974556e-09, "epoch": 9.922037422037421, "percentage": 99.22, "elapsed_time": "0:49:01", "remaining_time": "0:00:23", "throughput": 1235.84, "total_tokens": 3634992}
3838
+ {"current_steps": 19095, "total_steps": 19240, "loss": 0.0878, "lr": 8.76990568003111e-09, "epoch": 9.924636174636175, "percentage": 99.25, "elapsed_time": "0:49:02", "remaining_time": "0:00:22", "throughput": 1235.85, "total_tokens": 3635920}
3839
+ {"current_steps": 19100, "total_steps": 19240, "loss": 0.1618, "lr": 8.17954499554019e-09, "epoch": 9.927234927234927, "percentage": 99.27, "elapsed_time": "0:49:02", "remaining_time": "0:00:21", "throughput": 1235.85, "total_tokens": 3636816}
3840
+ {"current_steps": 19105, "total_steps": 19240, "loss": 0.1528, "lr": 7.609749962081413e-09, "epoch": 9.92983367983368, "percentage": 99.3, "elapsed_time": "0:49:03", "remaining_time": "0:00:20", "throughput": 1235.87, "total_tokens": 3637744}
3841
+ {"current_steps": 19110, "total_steps": 19240, "loss": 0.0899, "lr": 7.060521048532498e-09, "epoch": 9.932432432432432, "percentage": 99.32, "elapsed_time": "0:49:04", "remaining_time": "0:00:20", "throughput": 1235.87, "total_tokens": 3638640}
3842
+ {"current_steps": 19115, "total_steps": 19240, "loss": 0.2687, "lr": 6.5318587068541325e-09, "epoch": 9.935031185031185, "percentage": 99.35, "elapsed_time": "0:49:04", "remaining_time": "0:00:19", "throughput": 1235.9, "total_tokens": 3639632}
3843
+ {"current_steps": 19120, "total_steps": 19240, "loss": 0.232, "lr": 6.023763372076108e-09, "epoch": 9.937629937629938, "percentage": 99.38, "elapsed_time": "0:49:05", "remaining_time": "0:00:18", "throughput": 1235.93, "total_tokens": 3640592}
3844
+ {"current_steps": 19125, "total_steps": 19240, "loss": 0.1589, "lr": 5.536235462313965e-09, "epoch": 9.94022869022869, "percentage": 99.4, "elapsed_time": "0:49:06", "remaining_time": "0:00:17", "throughput": 1235.94, "total_tokens": 3641520}
3845
+ {"current_steps": 19130, "total_steps": 19240, "loss": 0.1459, "lr": 5.069275378746796e-09, "epoch": 9.942827442827443, "percentage": 99.43, "elapsed_time": "0:49:07", "remaining_time": "0:00:16", "throughput": 1235.95, "total_tokens": 3642448}
3846
+ {"current_steps": 19135, "total_steps": 19240, "loss": 0.1641, "lr": 4.622883505636666e-09, "epoch": 9.945426195426196, "percentage": 99.45, "elapsed_time": "0:49:07", "remaining_time": "0:00:16", "throughput": 1235.95, "total_tokens": 3643312}
3847
+ {"current_steps": 19140, "total_steps": 19240, "loss": 0.1036, "lr": 4.197060210317516e-09, "epoch": 9.948024948024948, "percentage": 99.48, "elapsed_time": "0:49:08", "remaining_time": "0:00:15", "throughput": 1235.95, "total_tokens": 3644208}
3848
+ {"current_steps": 19145, "total_steps": 19240, "loss": 0.2335, "lr": 3.791805843195162e-09, "epoch": 9.950623700623701, "percentage": 99.51, "elapsed_time": "0:49:09", "remaining_time": "0:00:14", "throughput": 1235.96, "total_tokens": 3645136}
3849
+ {"current_steps": 19150, "total_steps": 19240, "loss": 0.1219, "lr": 3.4071207377500693e-09, "epoch": 9.953222453222454, "percentage": 99.53, "elapsed_time": "0:49:09", "remaining_time": "0:00:13", "throughput": 1236.0, "total_tokens": 3646128}
3850
+ {"current_steps": 19155, "total_steps": 19240, "loss": 0.0624, "lr": 3.043005210542904e-09, "epoch": 9.955821205821206, "percentage": 99.56, "elapsed_time": "0:49:10", "remaining_time": "0:00:13", "throughput": 1236.04, "total_tokens": 3647152}
3851
+ {"current_steps": 19160, "total_steps": 19240, "loss": 0.0956, "lr": 2.6994595612006566e-09, "epoch": 9.958419958419958, "percentage": 99.58, "elapsed_time": "0:49:11", "remaining_time": "0:00:12", "throughput": 1236.04, "total_tokens": 3648016}
3852
+ {"current_steps": 19165, "total_steps": 19240, "loss": 0.1511, "lr": 2.376484072424967e-09, "epoch": 9.961018711018712, "percentage": 99.61, "elapsed_time": "0:49:12", "remaining_time": "0:00:11", "throughput": 1236.08, "total_tokens": 3649040}
3853
+ {"current_steps": 19170, "total_steps": 19240, "loss": 0.0904, "lr": 2.074079009989349e-09, "epoch": 9.963617463617464, "percentage": 99.64, "elapsed_time": "0:49:12", "remaining_time": "0:00:10", "throughput": 1236.09, "total_tokens": 3649968}
3854
+ {"current_steps": 19175, "total_steps": 19240, "loss": 0.0866, "lr": 1.7922446227447432e-09, "epoch": 9.966216216216216, "percentage": 99.66, "elapsed_time": "0:49:13", "remaining_time": "0:00:10", "throughput": 1236.11, "total_tokens": 3650896}
3855
+ {"current_steps": 19180, "total_steps": 19240, "loss": 0.1672, "lr": 1.5309811426056364e-09, "epoch": 9.96881496881497, "percentage": 99.69, "elapsed_time": "0:49:14", "remaining_time": "0:00:09", "throughput": 1236.14, "total_tokens": 3651888}
3856
+ {"current_steps": 19185, "total_steps": 19240, "loss": 0.1165, "lr": 1.2902887845722688e-09, "epoch": 9.971413721413722, "percentage": 99.71, "elapsed_time": "0:49:14", "remaining_time": "0:00:08", "throughput": 1236.15, "total_tokens": 3652816}
3857
+ {"current_steps": 19190, "total_steps": 19240, "loss": 0.1222, "lr": 1.070167746702877e-09, "epoch": 9.974012474012474, "percentage": 99.74, "elapsed_time": "0:49:15", "remaining_time": "0:00:07", "throughput": 1236.17, "total_tokens": 3653776}
3858
+ {"current_steps": 19195, "total_steps": 19240, "loss": 0.1784, "lr": 8.70618210138674e-10, "epoch": 9.976611226611226, "percentage": 99.77, "elapsed_time": "0:49:16", "remaining_time": "0:00:06", "throughput": 1236.23, "total_tokens": 3654832}
3859
+ {"current_steps": 19200, "total_steps": 19240, "loss": 0.1812, "lr": 6.916403390844206e-10, "epoch": 9.97920997920998, "percentage": 99.79, "elapsed_time": "0:49:17", "remaining_time": "0:00:06", "throughput": 1236.27, "total_tokens": 3655856}
3860
+ {"current_steps": 19205, "total_steps": 19240, "loss": 0.1354, "lr": 5.332342808223034e-10, "epoch": 9.981808731808732, "percentage": 99.82, "elapsed_time": "0:49:17", "remaining_time": "0:00:05", "throughput": 1236.29, "total_tokens": 3656784}
3861
+ {"current_steps": 19210, "total_steps": 19240, "loss": 0.1918, "lr": 3.9540016570083215e-10, "epoch": 9.984407484407484, "percentage": 99.84, "elapsed_time": "0:49:18", "remaining_time": "0:00:04", "throughput": 1236.33, "total_tokens": 3657808}
3862
+ {"current_steps": 19215, "total_steps": 19240, "loss": 0.1779, "lr": 2.7813810714871767e-10, "epoch": 9.987006237006238, "percentage": 99.87, "elapsed_time": "0:49:19", "remaining_time": "0:00:03", "throughput": 1236.35, "total_tokens": 3658736}
3863
+ {"current_steps": 19220, "total_steps": 19240, "loss": 0.1785, "lr": 1.8144820165544307e-10, "epoch": 9.98960498960499, "percentage": 99.9, "elapsed_time": "0:49:20", "remaining_time": "0:00:03", "throughput": 1236.35, "total_tokens": 3659632}
3864
+ {"current_steps": 19225, "total_steps": 19240, "loss": 0.1213, "lr": 1.0533052878791694e-10, "epoch": 9.992203742203742, "percentage": 99.92, "elapsed_time": "0:49:20", "remaining_time": "0:00:02", "throughput": 1236.35, "total_tokens": 3660528}
3865
+ {"current_steps": 19230, "total_steps": 19240, "loss": 0.1208, "lr": 4.978515118214677e-11, "epoch": 9.994802494802494, "percentage": 99.95, "elapsed_time": "0:49:21", "remaining_time": "0:00:01", "throughput": 1236.36, "total_tokens": 3661456}
3866
+ {"current_steps": 19235, "total_steps": 19240, "loss": 0.1426, "lr": 1.4812114548790057e-11, "epoch": 9.997401247401248, "percentage": 99.97, "elapsed_time": "0:49:22", "remaining_time": "0:00:00", "throughput": 1236.39, "total_tokens": 3662416}
3867
+ {"current_steps": 19240, "total_steps": 19240, "loss": 0.1007, "lr": 4.114476648275911e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:49:22", "remaining_time": "0:00:00", "throughput": 1236.39, "total_tokens": 3663392}
3868
+ {"current_steps": 19240, "total_steps": 19240, "eval_loss": 0.14546528458595276, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:49:30", "remaining_time": "0:00:00", "throughput": 1233.11, "total_tokens": 3663392}
3869
+ {"current_steps": 19240, "total_steps": 19240, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:49:31", "remaining_time": "0:00:00", "throughput": 1232.77, "total_tokens": 3663392}