rbelanec commited on
Commit
6164474
·
verified ·
1 Parent(s): 3ca9179

Training in progress, step 19080

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +191 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43fedbb214a085eadafb8c624b66df43d587898fb77f2a32fd95a5490de9eecd
3
  size 819328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e32c6084f62ab55e3d105f4d89423c744fb5b12c44b74c764a46afdc0bcea21
3
  size 819328
trainer_log.jsonl CHANGED
@@ -3644,3 +3644,194 @@
3644
  {"current_steps": 18126, "total_steps": 19080, "eval_loss": 0.49346524477005005, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "1:22:13", "remaining_time": "0:04:19", "throughput": 2401.06, "total_tokens": 11845704}
3645
  {"current_steps": 18130, "total_steps": 19080, "loss": 0.4939, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:22:15", "remaining_time": "0:04:18", "throughput": 2400.49, "total_tokens": 11847912}
3646
  {"current_steps": 18135, "total_steps": 19080, "loss": 0.3838, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:22:17", "remaining_time": "0:04:17", "throughput": 2400.75, "total_tokens": 11853192}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3644
  {"current_steps": 18126, "total_steps": 19080, "eval_loss": 0.49346524477005005, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "1:22:13", "remaining_time": "0:04:19", "throughput": 2401.06, "total_tokens": 11845704}
3645
  {"current_steps": 18130, "total_steps": 19080, "loss": 0.4939, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:22:15", "remaining_time": "0:04:18", "throughput": 2400.49, "total_tokens": 11847912}
3646
  {"current_steps": 18135, "total_steps": 19080, "loss": 0.3838, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:22:17", "remaining_time": "0:04:17", "throughput": 2400.75, "total_tokens": 11853192}
3647
+ {"current_steps": 18140, "total_steps": 19080, "loss": 0.5661, "lr": 3.6955099370666045e-07, "epoch": 9.50733752620545, "percentage": 95.07, "elapsed_time": "1:22:18", "remaining_time": "0:04:15", "throughput": 2400.85, "total_tokens": 11856808}
3648
+ {"current_steps": 18145, "total_steps": 19080, "loss": 0.7272, "lr": 3.656437875113522e-07, "epoch": 9.509958071278826, "percentage": 95.1, "elapsed_time": "1:22:19", "remaining_time": "0:04:14", "throughput": 2400.95, "total_tokens": 11860360}
3649
+ {"current_steps": 18150, "total_steps": 19080, "loss": 0.3784, "lr": 3.617571942200693e-07, "epoch": 9.5125786163522, "percentage": 95.13, "elapsed_time": "1:22:21", "remaining_time": "0:04:13", "throughput": 2400.94, "total_tokens": 11863048}
3650
+ {"current_steps": 18155, "total_steps": 19080, "loss": 0.4726, "lr": 3.5789121708493523e-07, "epoch": 9.515199161425576, "percentage": 95.15, "elapsed_time": "1:22:22", "remaining_time": "0:04:11", "throughput": 2401.08, "total_tokens": 11867016}
3651
+ {"current_steps": 18160, "total_steps": 19080, "loss": 0.4712, "lr": 3.5404585934082635e-07, "epoch": 9.517819706498951, "percentage": 95.18, "elapsed_time": "1:22:23", "remaining_time": "0:04:10", "throughput": 2401.2, "total_tokens": 11870728}
3652
+ {"current_steps": 18165, "total_steps": 19080, "loss": 0.5026, "lr": 3.502211242053577e-07, "epoch": 9.520440251572326, "percentage": 95.2, "elapsed_time": "1:22:24", "remaining_time": "0:04:09", "throughput": 2401.26, "total_tokens": 11873928}
3653
+ {"current_steps": 18170, "total_steps": 19080, "loss": 0.4329, "lr": 3.4641701487889697e-07, "epoch": 9.523060796645701, "percentage": 95.23, "elapsed_time": "1:22:26", "remaining_time": "0:04:07", "throughput": 2401.29, "total_tokens": 11876968}
3654
+ {"current_steps": 18175, "total_steps": 19080, "loss": 0.4659, "lr": 3.4263353454454806e-07, "epoch": 9.525681341719078, "percentage": 95.26, "elapsed_time": "1:22:27", "remaining_time": "0:04:06", "throughput": 2401.41, "total_tokens": 11880648}
3655
+ {"current_steps": 18180, "total_steps": 19080, "loss": 0.4903, "lr": 3.3887068636815346e-07, "epoch": 9.528301886792454, "percentage": 95.28, "elapsed_time": "1:22:28", "remaining_time": "0:04:04", "throughput": 2401.44, "total_tokens": 11883560}
3656
+ {"current_steps": 18185, "total_steps": 19080, "loss": 0.5316, "lr": 3.351284734982918e-07, "epoch": 9.530922431865829, "percentage": 95.31, "elapsed_time": "1:22:29", "remaining_time": "0:04:03", "throughput": 2401.68, "total_tokens": 11888296}
3657
+ {"current_steps": 18190, "total_steps": 19080, "loss": 0.4649, "lr": 3.3140689906628054e-07, "epoch": 9.533542976939204, "percentage": 95.34, "elapsed_time": "1:22:31", "remaining_time": "0:04:02", "throughput": 2401.9, "total_tokens": 11892872}
3658
+ {"current_steps": 18195, "total_steps": 19080, "loss": 0.4385, "lr": 3.2770596618615645e-07, "epoch": 9.536163522012579, "percentage": 95.36, "elapsed_time": "1:22:32", "remaining_time": "0:04:00", "throughput": 2401.94, "total_tokens": 11895944}
3659
+ {"current_steps": 18200, "total_steps": 19080, "loss": 0.5187, "lr": 3.240256779546952e-07, "epoch": 9.538784067085954, "percentage": 95.39, "elapsed_time": "1:22:33", "remaining_time": "0:03:59", "throughput": 2401.95, "total_tokens": 11898696}
3660
+ {"current_steps": 18205, "total_steps": 19080, "loss": 0.3989, "lr": 3.2036603745139447e-07, "epoch": 9.54140461215933, "percentage": 95.41, "elapsed_time": "1:22:35", "remaining_time": "0:03:58", "throughput": 2402.28, "total_tokens": 11904744}
3661
+ {"current_steps": 18210, "total_steps": 19080, "loss": 0.4408, "lr": 3.167270477384743e-07, "epoch": 9.544025157232705, "percentage": 95.44, "elapsed_time": "1:22:36", "remaining_time": "0:03:56", "throughput": 2402.3, "total_tokens": 11907656}
3662
+ {"current_steps": 18215, "total_steps": 19080, "loss": 0.5663, "lr": 3.1310871186086834e-07, "epoch": 9.54664570230608, "percentage": 95.47, "elapsed_time": "1:22:37", "remaining_time": "0:03:55", "throughput": 2402.31, "total_tokens": 11910504}
3663
+ {"current_steps": 18220, "total_steps": 19080, "loss": 0.5552, "lr": 3.095110328462464e-07, "epoch": 9.549266247379455, "percentage": 95.49, "elapsed_time": "1:22:39", "remaining_time": "0:03:54", "throughput": 2402.33, "total_tokens": 11913192}
3664
+ {"current_steps": 18225, "total_steps": 19080, "loss": 0.6196, "lr": 3.0593401370497264e-07, "epoch": 9.55188679245283, "percentage": 95.52, "elapsed_time": "1:22:40", "remaining_time": "0:03:52", "throughput": 2402.4, "total_tokens": 11916680}
3665
+ {"current_steps": 18230, "total_steps": 19080, "loss": 0.4302, "lr": 3.0237765743013626e-07, "epoch": 9.554507337526205, "percentage": 95.55, "elapsed_time": "1:22:41", "remaining_time": "0:03:51", "throughput": 2402.44, "total_tokens": 11919656}
3666
+ {"current_steps": 18235, "total_steps": 19080, "loss": 0.4515, "lr": 2.9884196699753453e-07, "epoch": 9.55712788259958, "percentage": 95.57, "elapsed_time": "1:22:42", "remaining_time": "0:03:49", "throughput": 2402.47, "total_tokens": 11922632}
3667
+ {"current_steps": 18240, "total_steps": 19080, "loss": 0.4998, "lr": 2.953269453656704e-07, "epoch": 9.559748427672956, "percentage": 95.6, "elapsed_time": "1:22:43", "remaining_time": "0:03:48", "throughput": 2402.59, "total_tokens": 11926280}
3668
+ {"current_steps": 18245, "total_steps": 19080, "loss": 0.3326, "lr": 2.9183259547575504e-07, "epoch": 9.56236897274633, "percentage": 95.62, "elapsed_time": "1:22:45", "remaining_time": "0:03:47", "throughput": 2402.68, "total_tokens": 11929768}
3669
+ {"current_steps": 18250, "total_steps": 19080, "loss": 0.4544, "lr": 2.883589202517023e-07, "epoch": 9.564989517819706, "percentage": 95.65, "elapsed_time": "1:22:46", "remaining_time": "0:03:45", "throughput": 2402.8, "total_tokens": 11933480}
3670
+ {"current_steps": 18255, "total_steps": 19080, "loss": 0.397, "lr": 2.849059226001177e-07, "epoch": 9.567610062893081, "percentage": 95.68, "elapsed_time": "1:22:47", "remaining_time": "0:03:44", "throughput": 2402.81, "total_tokens": 11936200}
3671
+ {"current_steps": 18260, "total_steps": 19080, "loss": 0.574, "lr": 2.8147360541032065e-07, "epoch": 9.570230607966456, "percentage": 95.7, "elapsed_time": "1:22:48", "remaining_time": "0:03:43", "throughput": 2402.76, "total_tokens": 11938472}
3672
+ {"current_steps": 18265, "total_steps": 19080, "loss": 0.5555, "lr": 2.780619715543109e-07, "epoch": 9.572851153039831, "percentage": 95.73, "elapsed_time": "1:22:49", "remaining_time": "0:03:41", "throughput": 2402.89, "total_tokens": 11942280}
3673
+ {"current_steps": 18270, "total_steps": 19080, "loss": 0.4291, "lr": 2.746710238867911e-07, "epoch": 9.575471698113208, "percentage": 95.75, "elapsed_time": "1:22:51", "remaining_time": "0:03:40", "throughput": 2402.98, "total_tokens": 11945800}
3674
+ {"current_steps": 18275, "total_steps": 19080, "loss": 0.4556, "lr": 2.713007652451499e-07, "epoch": 9.578092243186584, "percentage": 95.78, "elapsed_time": "1:22:52", "remaining_time": "0:03:39", "throughput": 2402.94, "total_tokens": 11948200}
3675
+ {"current_steps": 18280, "total_steps": 19080, "loss": 0.5133, "lr": 2.6795119844946757e-07, "epoch": 9.580712788259959, "percentage": 95.81, "elapsed_time": "1:22:53", "remaining_time": "0:03:37", "throughput": 2403.02, "total_tokens": 11951656}
3676
+ {"current_steps": 18285, "total_steps": 19080, "loss": 0.4866, "lr": 2.646223263025077e-07, "epoch": 9.583333333333334, "percentage": 95.83, "elapsed_time": "1:22:54", "remaining_time": "0:03:36", "throughput": 2403.1, "total_tokens": 11955208}
3677
+ {"current_steps": 18290, "total_steps": 19080, "loss": 0.3932, "lr": 2.6131415158971993e-07, "epoch": 9.585953878406709, "percentage": 95.86, "elapsed_time": "1:22:56", "remaining_time": "0:03:34", "throughput": 2403.08, "total_tokens": 11957768}
3678
+ {"current_steps": 18295, "total_steps": 19080, "loss": 0.457, "lr": 2.5802667707922887e-07, "epoch": 9.588574423480084, "percentage": 95.89, "elapsed_time": "1:22:57", "remaining_time": "0:03:33", "throughput": 2403.09, "total_tokens": 11960552}
3679
+ {"current_steps": 18300, "total_steps": 19080, "loss": 0.5494, "lr": 2.54759905521848e-07, "epoch": 9.59119496855346, "percentage": 95.91, "elapsed_time": "1:22:58", "remaining_time": "0:03:32", "throughput": 2403.23, "total_tokens": 11964552}
3680
+ {"current_steps": 18305, "total_steps": 19080, "loss": 0.4582, "lr": 2.51513839651063e-07, "epoch": 9.593815513626835, "percentage": 95.94, "elapsed_time": "1:22:59", "remaining_time": "0:03:30", "throughput": 2403.18, "total_tokens": 11966824}
3681
+ {"current_steps": 18310, "total_steps": 19080, "loss": 0.4692, "lr": 2.4828848218302615e-07, "epoch": 9.59643605870021, "percentage": 95.96, "elapsed_time": "1:23:00", "remaining_time": "0:03:29", "throughput": 2403.22, "total_tokens": 11969832}
3682
+ {"current_steps": 18315, "total_steps": 19080, "loss": 0.4238, "lr": 2.450838358165786e-07, "epoch": 9.599056603773585, "percentage": 95.99, "elapsed_time": "1:23:01", "remaining_time": "0:03:28", "throughput": 2403.2, "total_tokens": 11972424}
3683
+ {"current_steps": 18320, "total_steps": 19080, "loss": 0.4993, "lr": 2.41899903233217e-07, "epoch": 9.60167714884696, "percentage": 96.02, "elapsed_time": "1:23:03", "remaining_time": "0:03:26", "throughput": 2403.35, "total_tokens": 11976552}
3684
+ {"current_steps": 18325, "total_steps": 19080, "loss": 0.4614, "lr": 2.387366870971103e-07, "epoch": 9.604297693920335, "percentage": 96.04, "elapsed_time": "1:23:04", "remaining_time": "0:03:25", "throughput": 2403.39, "total_tokens": 11979720}
3685
+ {"current_steps": 18330, "total_steps": 19080, "loss": 0.5552, "lr": 2.3559419005509675e-07, "epoch": 9.60691823899371, "percentage": 96.07, "elapsed_time": "1:23:05", "remaining_time": "0:03:23", "throughput": 2403.41, "total_tokens": 11982536}
3686
+ {"current_steps": 18335, "total_steps": 19080, "loss": 0.3758, "lr": 2.3247241473667026e-07, "epoch": 9.609538784067086, "percentage": 96.1, "elapsed_time": "1:23:06", "remaining_time": "0:03:22", "throughput": 2403.43, "total_tokens": 11985384}
3687
+ {"current_steps": 18340, "total_steps": 19080, "loss": 0.4599, "lr": 2.2937136375399126e-07, "epoch": 9.61215932914046, "percentage": 96.12, "elapsed_time": "1:23:08", "remaining_time": "0:03:21", "throughput": 2403.5, "total_tokens": 11988712}
3688
+ {"current_steps": 18345, "total_steps": 19080, "loss": 0.4535, "lr": 2.2629103970188137e-07, "epoch": 9.614779874213836, "percentage": 96.15, "elapsed_time": "1:23:09", "remaining_time": "0:03:19", "throughput": 2403.7, "total_tokens": 11993352}
3689
+ {"current_steps": 18350, "total_steps": 19080, "loss": 0.4869, "lr": 2.2323144515780935e-07, "epoch": 9.617400419287211, "percentage": 96.17, "elapsed_time": "1:23:10", "remaining_time": "0:03:18", "throughput": 2403.68, "total_tokens": 11995848}
3690
+ {"current_steps": 18355, "total_steps": 19080, "loss": 0.5491, "lr": 2.201925826819079e-07, "epoch": 9.620020964360586, "percentage": 96.2, "elapsed_time": "1:23:11", "remaining_time": "0:03:17", "throughput": 2403.77, "total_tokens": 11999336}
3691
+ {"current_steps": 18360, "total_steps": 19080, "loss": 0.499, "lr": 2.1717445481695408e-07, "epoch": 9.622641509433961, "percentage": 96.23, "elapsed_time": "1:23:13", "remaining_time": "0:03:15", "throughput": 2403.87, "total_tokens": 12003080}
3692
+ {"current_steps": 18365, "total_steps": 19080, "loss": 0.3016, "lr": 2.1417706408838333e-07, "epoch": 9.625262054507338, "percentage": 96.25, "elapsed_time": "1:23:14", "remaining_time": "0:03:14", "throughput": 2404.02, "total_tokens": 12007240}
3693
+ {"current_steps": 18370, "total_steps": 19080, "loss": 0.5111, "lr": 2.112004130042755e-07, "epoch": 9.627882599580714, "percentage": 96.28, "elapsed_time": "1:23:15", "remaining_time": "0:03:13", "throughput": 2404.03, "total_tokens": 12009928}
3694
+ {"current_steps": 18375, "total_steps": 19080, "loss": 0.4439, "lr": 2.082445040553549e-07, "epoch": 9.630503144654089, "percentage": 96.31, "elapsed_time": "1:23:17", "remaining_time": "0:03:11", "throughput": 2404.11, "total_tokens": 12013384}
3695
+ {"current_steps": 18380, "total_steps": 19080, "loss": 0.5225, "lr": 2.053093397149902e-07, "epoch": 9.633123689727464, "percentage": 96.33, "elapsed_time": "1:23:18", "remaining_time": "0:03:10", "throughput": 2404.16, "total_tokens": 12016520}
3696
+ {"current_steps": 18385, "total_steps": 19080, "loss": 0.4522, "lr": 2.0239492243919467e-07, "epoch": 9.635744234800839, "percentage": 96.36, "elapsed_time": "1:23:19", "remaining_time": "0:03:08", "throughput": 2404.26, "total_tokens": 12020040}
3697
+ {"current_steps": 18390, "total_steps": 19080, "loss": 0.4685, "lr": 1.9950125466662028e-07, "epoch": 9.638364779874214, "percentage": 96.38, "elapsed_time": "1:23:20", "remaining_time": "0:03:07", "throughput": 2404.34, "total_tokens": 12023464}
3698
+ {"current_steps": 18395, "total_steps": 19080, "loss": 0.3587, "lr": 1.9662833881855248e-07, "epoch": 9.64098532494759, "percentage": 96.41, "elapsed_time": "1:23:21", "remaining_time": "0:03:06", "throughput": 2404.39, "total_tokens": 12026664}
3699
+ {"current_steps": 18400, "total_steps": 19080, "loss": 0.4043, "lr": 1.9377617729891828e-07, "epoch": 9.643605870020965, "percentage": 96.44, "elapsed_time": "1:23:23", "remaining_time": "0:03:04", "throughput": 2404.52, "total_tokens": 12030440}
3700
+ {"current_steps": 18405, "total_steps": 19080, "loss": 0.514, "lr": 1.9094477249427534e-07, "epoch": 9.64622641509434, "percentage": 96.46, "elapsed_time": "1:23:24", "remaining_time": "0:03:03", "throughput": 2404.49, "total_tokens": 12032872}
3701
+ {"current_steps": 18410, "total_steps": 19080, "loss": 0.41, "lr": 1.8813412677381737e-07, "epoch": 9.648846960167715, "percentage": 96.49, "elapsed_time": "1:23:25", "remaining_time": "0:03:02", "throughput": 2404.73, "total_tokens": 12037864}
3702
+ {"current_steps": 18415, "total_steps": 19080, "loss": 0.3506, "lr": 1.8534424248935756e-07, "epoch": 9.65146750524109, "percentage": 96.51, "elapsed_time": "1:23:27", "remaining_time": "0:03:00", "throughput": 2404.76, "total_tokens": 12040904}
3703
+ {"current_steps": 18420, "total_steps": 19080, "loss": 0.4699, "lr": 1.8257512197535076e-07, "epoch": 9.654088050314465, "percentage": 96.54, "elapsed_time": "1:23:28", "remaining_time": "0:02:59", "throughput": 2404.84, "total_tokens": 12044296}
3704
+ {"current_steps": 18425, "total_steps": 19080, "loss": 0.3848, "lr": 1.7982676754886574e-07, "epoch": 9.65670859538784, "percentage": 96.57, "elapsed_time": "1:23:29", "remaining_time": "0:02:58", "throughput": 2404.88, "total_tokens": 12047208}
3705
+ {"current_steps": 18430, "total_steps": 19080, "loss": 0.5437, "lr": 1.7709918150959904e-07, "epoch": 9.659329140461216, "percentage": 96.59, "elapsed_time": "1:23:30", "remaining_time": "0:02:56", "throughput": 2404.96, "total_tokens": 12050696}
3706
+ {"current_steps": 18435, "total_steps": 19080, "loss": 0.5252, "lr": 1.7439236613987775e-07, "epoch": 9.66194968553459, "percentage": 96.62, "elapsed_time": "1:23:32", "remaining_time": "0:02:55", "throughput": 2405.07, "total_tokens": 12054536}
3707
+ {"current_steps": 18440, "total_steps": 19080, "loss": 0.5061, "lr": 1.717063237046318e-07, "epoch": 9.664570230607966, "percentage": 96.65, "elapsed_time": "1:23:33", "remaining_time": "0:02:53", "throughput": 2405.02, "total_tokens": 12056776}
3708
+ {"current_steps": 18445, "total_steps": 19080, "loss": 0.5389, "lr": 1.6904105645142444e-07, "epoch": 9.667190775681341, "percentage": 96.67, "elapsed_time": "1:23:34", "remaining_time": "0:02:52", "throughput": 2405.04, "total_tokens": 12059720}
3709
+ {"current_steps": 18450, "total_steps": 19080, "loss": 0.4766, "lr": 1.6639656661043e-07, "epoch": 9.669811320754716, "percentage": 96.7, "elapsed_time": "1:23:35", "remaining_time": "0:02:51", "throughput": 2405.14, "total_tokens": 12063304}
3710
+ {"current_steps": 18455, "total_steps": 19080, "loss": 0.5299, "lr": 1.6377285639443407e-07, "epoch": 9.672431865828091, "percentage": 96.72, "elapsed_time": "1:23:37", "remaining_time": "0:02:49", "throughput": 2405.32, "total_tokens": 12067592}
3711
+ {"current_steps": 18460, "total_steps": 19080, "loss": 0.4822, "lr": 1.61169927998836e-07, "epoch": 9.675052410901468, "percentage": 96.75, "elapsed_time": "1:23:38", "remaining_time": "0:02:48", "throughput": 2405.38, "total_tokens": 12070856}
3712
+ {"current_steps": 18465, "total_steps": 19080, "loss": 0.5095, "lr": 1.5858778360165195e-07, "epoch": 9.677672955974844, "percentage": 96.78, "elapsed_time": "1:23:39", "remaining_time": "0:02:47", "throughput": 2405.46, "total_tokens": 12074280}
3713
+ {"current_steps": 18470, "total_steps": 19080, "loss": 0.4208, "lr": 1.5602642536350075e-07, "epoch": 9.680293501048219, "percentage": 96.8, "elapsed_time": "1:23:40", "remaining_time": "0:02:45", "throughput": 2405.5, "total_tokens": 12077288}
3714
+ {"current_steps": 18475, "total_steps": 19080, "loss": 0.404, "lr": 1.5348585542760974e-07, "epoch": 9.682914046121594, "percentage": 96.83, "elapsed_time": "1:23:42", "remaining_time": "0:02:44", "throughput": 2405.7, "total_tokens": 12082056}
3715
+ {"current_steps": 18480, "total_steps": 19080, "loss": 0.5608, "lr": 1.5096607591980894e-07, "epoch": 9.685534591194969, "percentage": 96.86, "elapsed_time": "1:23:43", "remaining_time": "0:02:43", "throughput": 2405.75, "total_tokens": 12085128}
3716
+ {"current_steps": 18485, "total_steps": 19080, "loss": 0.6203, "lr": 1.4846708894853955e-07, "epoch": 9.688155136268344, "percentage": 96.88, "elapsed_time": "1:23:44", "remaining_time": "0:02:41", "throughput": 2405.88, "total_tokens": 12089032}
3717
+ {"current_steps": 18490, "total_steps": 19080, "loss": 0.5655, "lr": 1.459888966048373e-07, "epoch": 9.69077568134172, "percentage": 96.91, "elapsed_time": "1:23:45", "remaining_time": "0:02:40", "throughput": 2405.86, "total_tokens": 12091496}
3718
+ {"current_steps": 18495, "total_steps": 19080, "loss": 0.587, "lr": 1.4353150096234058e-07, "epoch": 9.693396226415095, "percentage": 96.93, "elapsed_time": "1:23:47", "remaining_time": "0:02:39", "throughput": 2405.97, "total_tokens": 12095208}
3719
+ {"current_steps": 18500, "total_steps": 19080, "loss": 0.5365, "lr": 1.410949040772852e-07, "epoch": 9.69601677148847, "percentage": 96.96, "elapsed_time": "1:23:48", "remaining_time": "0:02:37", "throughput": 2406.11, "total_tokens": 12099176}
3720
+ {"current_steps": 18505, "total_steps": 19080, "loss": 0.5739, "lr": 1.3867910798850692e-07, "epoch": 9.698637316561845, "percentage": 96.99, "elapsed_time": "1:23:49", "remaining_time": "0:02:36", "throughput": 2406.17, "total_tokens": 12102408}
3721
+ {"current_steps": 18510, "total_steps": 19080, "loss": 0.5749, "lr": 1.3628411471742764e-07, "epoch": 9.70125786163522, "percentage": 97.01, "elapsed_time": "1:23:50", "remaining_time": "0:02:34", "throughput": 2406.24, "total_tokens": 12105704}
3722
+ {"current_steps": 18515, "total_steps": 19080, "loss": 0.5614, "lr": 1.3390992626807485e-07, "epoch": 9.703878406708595, "percentage": 97.04, "elapsed_time": "1:23:52", "remaining_time": "0:02:33", "throughput": 2406.26, "total_tokens": 12108520}
3723
+ {"current_steps": 18520, "total_steps": 19080, "loss": 0.4575, "lr": 1.315565446270567e-07, "epoch": 9.70649895178197, "percentage": 97.06, "elapsed_time": "1:23:53", "remaining_time": "0:02:32", "throughput": 2406.34, "total_tokens": 12111912}
3724
+ {"current_steps": 18525, "total_steps": 19080, "loss": 0.4493, "lr": 1.292239717635785e-07, "epoch": 9.709119496855346, "percentage": 97.09, "elapsed_time": "1:23:54", "remaining_time": "0:02:30", "throughput": 2406.48, "total_tokens": 12116040}
3725
+ {"current_steps": 18530, "total_steps": 19080, "loss": 0.4364, "lr": 1.269122096294262e-07, "epoch": 9.71174004192872, "percentage": 97.12, "elapsed_time": "1:23:55", "remaining_time": "0:02:29", "throughput": 2406.46, "total_tokens": 12118632}
3726
+ {"current_steps": 18535, "total_steps": 19080, "loss": 0.3493, "lr": 1.24621260158983e-07, "epoch": 9.714360587002096, "percentage": 97.14, "elapsed_time": "1:23:56", "remaining_time": "0:02:28", "throughput": 2406.45, "total_tokens": 12121192}
3727
+ {"current_steps": 18540, "total_steps": 19080, "loss": 0.4641, "lr": 1.2235112526920723e-07, "epoch": 9.716981132075471, "percentage": 97.17, "elapsed_time": "1:23:58", "remaining_time": "0:02:26", "throughput": 2406.45, "total_tokens": 12123976}
3728
+ {"current_steps": 18545, "total_steps": 19080, "loss": 0.6101, "lr": 1.2010180685964324e-07, "epoch": 9.719601677148846, "percentage": 97.2, "elapsed_time": "1:23:59", "remaining_time": "0:02:25", "throughput": 2406.58, "total_tokens": 12127816}
3729
+ {"current_steps": 18550, "total_steps": 19080, "loss": 0.5649, "lr": 1.1787330681241881e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "1:24:00", "remaining_time": "0:02:24", "throughput": 2406.73, "total_tokens": 12131848}
3730
+ {"current_steps": 18555, "total_steps": 19080, "loss": 0.4704, "lr": 1.156656269922396e-07, "epoch": 9.724842767295598, "percentage": 97.25, "elapsed_time": "1:24:02", "remaining_time": "0:02:22", "throughput": 2406.82, "total_tokens": 12135432}
3731
+ {"current_steps": 18560, "total_steps": 19080, "loss": 0.5224, "lr": 1.1347876924639455e-07, "epoch": 9.727463312368974, "percentage": 97.27, "elapsed_time": "1:24:03", "remaining_time": "0:02:21", "throughput": 2406.85, "total_tokens": 12138376}
3732
+ {"current_steps": 18565, "total_steps": 19080, "loss": 0.5892, "lr": 1.1131273540474496e-07, "epoch": 9.730083857442349, "percentage": 97.3, "elapsed_time": "1:24:04", "remaining_time": "0:02:19", "throughput": 2406.89, "total_tokens": 12141480}
3733
+ {"current_steps": 18570, "total_steps": 19080, "loss": 0.4339, "lr": 1.091675272797299e-07, "epoch": 9.732704402515724, "percentage": 97.33, "elapsed_time": "1:24:05", "remaining_time": "0:02:18", "throughput": 2406.89, "total_tokens": 12144168}
3734
+ {"current_steps": 18575, "total_steps": 19080, "loss": 0.5897, "lr": 1.0704314666635795e-07, "epoch": 9.735324947589099, "percentage": 97.35, "elapsed_time": "1:24:06", "remaining_time": "0:02:17", "throughput": 2407.04, "total_tokens": 12148168}
3735
+ {"current_steps": 18580, "total_steps": 19080, "loss": 0.3555, "lr": 1.0493959534221832e-07, "epoch": 9.737945492662474, "percentage": 97.38, "elapsed_time": "1:24:08", "remaining_time": "0:02:15", "throughput": 2407.02, "total_tokens": 12150696}
3736
+ {"current_steps": 18585, "total_steps": 19080, "loss": 0.3857, "lr": 1.0285687506746133e-07, "epoch": 9.74056603773585, "percentage": 97.41, "elapsed_time": "1:24:09", "remaining_time": "0:02:14", "throughput": 2407.09, "total_tokens": 12153928}
3737
+ {"current_steps": 18590, "total_steps": 19080, "loss": 0.4415, "lr": 1.0079498758481798e-07, "epoch": 9.743186582809225, "percentage": 97.43, "elapsed_time": "1:24:10", "remaining_time": "0:02:13", "throughput": 2407.18, "total_tokens": 12157384}
3738
+ {"current_steps": 18595, "total_steps": 19080, "loss": 0.4572, "lr": 9.87539346195776e-08, "epoch": 9.7458071278826, "percentage": 97.46, "elapsed_time": "1:24:11", "remaining_time": "0:02:11", "throughput": 2407.2, "total_tokens": 12160200}
3739
+ {"current_steps": 18600, "total_steps": 19080, "loss": 0.6747, "lr": 9.673371787960183e-08, "epoch": 9.748427672955975, "percentage": 97.48, "elapsed_time": "1:24:12", "remaining_time": "0:02:10", "throughput": 2407.16, "total_tokens": 12162504}
3740
+ {"current_steps": 18605, "total_steps": 19080, "loss": 0.4962, "lr": 9.473433905531626e-08, "epoch": 9.75104821802935, "percentage": 97.51, "elapsed_time": "1:24:13", "remaining_time": "0:02:09", "throughput": 2407.16, "total_tokens": 12165288}
3741
+ {"current_steps": 18610, "total_steps": 19080, "loss": 0.4674, "lr": 9.275579981970483e-08, "epoch": 9.753668763102725, "percentage": 97.54, "elapsed_time": "1:24:14", "remaining_time": "0:02:07", "throughput": 2407.14, "total_tokens": 12167912}
3742
+ {"current_steps": 18615, "total_steps": 19080, "loss": 0.5648, "lr": 9.07981018283266e-08, "epoch": 9.7562893081761, "percentage": 97.56, "elapsed_time": "1:24:16", "remaining_time": "0:02:06", "throughput": 2407.25, "total_tokens": 12171624}
3743
+ {"current_steps": 18620, "total_steps": 19080, "loss": 0.5095, "lr": 8.886124671928786e-08, "epoch": 9.758909853249476, "percentage": 97.59, "elapsed_time": "1:24:17", "remaining_time": "0:02:04", "throughput": 2407.3, "total_tokens": 12174632}
3744
+ {"current_steps": 18625, "total_steps": 19080, "loss": 0.6197, "lr": 8.694523611326444e-08, "epoch": 9.76153039832285, "percentage": 97.62, "elapsed_time": "1:24:18", "remaining_time": "0:02:03", "throughput": 2407.36, "total_tokens": 12177896}
3745
+ {"current_steps": 18630, "total_steps": 19080, "loss": 0.5257, "lr": 8.505007161348222e-08, "epoch": 9.764150943396226, "percentage": 97.64, "elapsed_time": "1:24:19", "remaining_time": "0:02:02", "throughput": 2407.42, "total_tokens": 12181256}
3746
+ {"current_steps": 18635, "total_steps": 19080, "loss": 0.5319, "lr": 8.31757548057338e-08, "epoch": 9.766771488469601, "percentage": 97.67, "elapsed_time": "1:24:20", "remaining_time": "0:02:00", "throughput": 2407.41, "total_tokens": 12183848}
3747
+ {"current_steps": 18640, "total_steps": 19080, "loss": 0.4589, "lr": 8.132228725835634e-08, "epoch": 9.769392033542976, "percentage": 97.69, "elapsed_time": "1:24:22", "remaining_time": "0:01:59", "throughput": 2407.49, "total_tokens": 12187208}
3748
+ {"current_steps": 18645, "total_steps": 19080, "loss": 0.4996, "lr": 7.948967052225087e-08, "epoch": 9.772012578616351, "percentage": 97.72, "elapsed_time": "1:24:23", "remaining_time": "0:01:58", "throughput": 2407.57, "total_tokens": 12190568}
3749
+ {"current_steps": 18650, "total_steps": 19080, "loss": 0.3631, "lr": 7.767790613086301e-08, "epoch": 9.774633123689728, "percentage": 97.75, "elapsed_time": "1:24:24", "remaining_time": "0:01:56", "throughput": 2407.57, "total_tokens": 12193224}
3750
+ {"current_steps": 18655, "total_steps": 19080, "loss": 0.4754, "lr": 7.588699560019952e-08, "epoch": 9.777253668763104, "percentage": 97.77, "elapsed_time": "1:24:25", "remaining_time": "0:01:55", "throughput": 2407.57, "total_tokens": 12195816}
3751
+ {"current_steps": 18660, "total_steps": 19080, "loss": 0.4728, "lr": 7.411694042881168e-08, "epoch": 9.779874213836479, "percentage": 97.8, "elapsed_time": "1:24:26", "remaining_time": "0:01:54", "throughput": 2407.6, "total_tokens": 12198856}
3752
+ {"current_steps": 18665, "total_steps": 19080, "loss": 0.5516, "lr": 7.23677420977953e-08, "epoch": 9.782494758909854, "percentage": 97.82, "elapsed_time": "1:24:28", "remaining_time": "0:01:52", "throughput": 2407.65, "total_tokens": 12201992}
3753
+ {"current_steps": 18670, "total_steps": 19080, "loss": 0.4992, "lr": 7.063940207080733e-08, "epoch": 9.785115303983229, "percentage": 97.85, "elapsed_time": "1:24:29", "remaining_time": "0:01:51", "throughput": 2407.75, "total_tokens": 12205608}
3754
+ {"current_steps": 18675, "total_steps": 19080, "loss": 0.5809, "lr": 6.893192179403817e-08, "epoch": 9.787735849056604, "percentage": 97.88, "elapsed_time": "1:24:30", "remaining_time": "0:01:49", "throughput": 2407.87, "total_tokens": 12209352}
3755
+ {"current_steps": 18680, "total_steps": 19080, "loss": 0.4863, "lr": 6.724530269623108e-08, "epoch": 9.79035639412998, "percentage": 97.9, "elapsed_time": "1:24:32", "remaining_time": "0:01:48", "throughput": 2408.04, "total_tokens": 12213768}
3756
+ {"current_steps": 18685, "total_steps": 19080, "loss": 0.4849, "lr": 6.557954618867102e-08, "epoch": 9.792976939203355, "percentage": 97.93, "elapsed_time": "1:24:33", "remaining_time": "0:01:47", "throughput": 2408.08, "total_tokens": 12216776}
3757
+ {"current_steps": 18690, "total_steps": 19080, "loss": 0.4061, "lr": 6.393465366519024e-08, "epoch": 9.79559748427673, "percentage": 97.96, "elapsed_time": "1:24:34", "remaining_time": "0:01:45", "throughput": 2408.25, "total_tokens": 12221000}
3758
+ {"current_steps": 18695, "total_steps": 19080, "loss": 0.4713, "lr": 6.231062650215724e-08, "epoch": 9.798218029350105, "percentage": 97.98, "elapsed_time": "1:24:35", "remaining_time": "0:01:44", "throughput": 2408.3, "total_tokens": 12224200}
3759
+ {"current_steps": 18700, "total_steps": 19080, "loss": 0.4868, "lr": 6.070746605848221e-08, "epoch": 9.80083857442348, "percentage": 98.01, "elapsed_time": "1:24:36", "remaining_time": "0:01:43", "throughput": 2408.3, "total_tokens": 12226920}
3760
+ {"current_steps": 18705, "total_steps": 19080, "loss": 0.5285, "lr": 5.912517367561987e-08, "epoch": 9.803459119496855, "percentage": 98.03, "elapsed_time": "1:24:38", "remaining_time": "0:01:41", "throughput": 2408.33, "total_tokens": 12229960}
3761
+ {"current_steps": 18710, "total_steps": 19080, "loss": 0.4468, "lr": 5.756375067755837e-08, "epoch": 9.80607966457023, "percentage": 98.06, "elapsed_time": "1:24:39", "remaining_time": "0:01:40", "throughput": 2408.27, "total_tokens": 12232040}
3762
+ {"current_steps": 18715, "total_steps": 19080, "loss": 0.4096, "lr": 5.602319837082481e-08, "epoch": 9.808700209643606, "percentage": 98.09, "elapsed_time": "1:24:40", "remaining_time": "0:01:39", "throughput": 2408.24, "total_tokens": 12234472}
3763
+ {"current_steps": 18720, "total_steps": 19080, "loss": 0.3662, "lr": 5.450351804448528e-08, "epoch": 9.81132075471698, "percentage": 98.11, "elapsed_time": "1:24:41", "remaining_time": "0:01:37", "throughput": 2408.27, "total_tokens": 12237448}
3764
+ {"current_steps": 18725, "total_steps": 19080, "loss": 0.5035, "lr": 5.3004710970133705e-08, "epoch": 9.813941299790356, "percentage": 98.14, "elapsed_time": "1:24:42", "remaining_time": "0:01:36", "throughput": 2408.27, "total_tokens": 12240264}
3765
+ {"current_steps": 18730, "total_steps": 19080, "loss": 0.4455, "lr": 5.1526778401911334e-08, "epoch": 9.816561844863731, "percentage": 98.17, "elapsed_time": "1:24:43", "remaining_time": "0:01:34", "throughput": 2408.29, "total_tokens": 12243176}
3766
+ {"current_steps": 18735, "total_steps": 19080, "loss": 0.5173, "lr": 5.0069721576476156e-08, "epoch": 9.819182389937106, "percentage": 98.19, "elapsed_time": "1:24:44", "remaining_time": "0:01:33", "throughput": 2408.28, "total_tokens": 12245864}
3767
+ {"current_steps": 18740, "total_steps": 19080, "loss": 0.4505, "lr": 4.863354171303347e-08, "epoch": 9.821802935010481, "percentage": 98.22, "elapsed_time": "1:24:46", "remaining_time": "0:01:32", "throughput": 2408.28, "total_tokens": 12248712}
3768
+ {"current_steps": 18745, "total_steps": 19080, "loss": 0.5472, "lr": 4.72182400133081e-08, "epoch": 9.824423480083858, "percentage": 98.24, "elapsed_time": "1:24:47", "remaining_time": "0:01:30", "throughput": 2408.25, "total_tokens": 12251144}
3769
+ {"current_steps": 18750, "total_steps": 19080, "loss": 0.4718, "lr": 4.582381766156385e-08, "epoch": 9.827044025157234, "percentage": 98.27, "elapsed_time": "1:24:48", "remaining_time": "0:01:29", "throughput": 2408.41, "total_tokens": 12255336}
3770
+ {"current_steps": 18755, "total_steps": 19080, "loss": 0.5339, "lr": 4.445027582458683e-08, "epoch": 9.829664570230609, "percentage": 98.3, "elapsed_time": "1:24:49", "remaining_time": "0:01:28", "throughput": 2408.36, "total_tokens": 12257672}
3771
+ {"current_steps": 18760, "total_steps": 19080, "loss": 0.4722, "lr": 4.309761565169379e-08, "epoch": 9.832285115303984, "percentage": 98.32, "elapsed_time": "1:24:50", "remaining_time": "0:01:26", "throughput": 2408.43, "total_tokens": 12261032}
3772
+ {"current_steps": 18765, "total_steps": 19080, "loss": 0.4682, "lr": 4.1765838274732125e-08, "epoch": 9.834905660377359, "percentage": 98.35, "elapsed_time": "1:24:52", "remaining_time": "0:01:25", "throughput": 2408.5, "total_tokens": 12264488}
3773
+ {"current_steps": 18770, "total_steps": 19080, "loss": 0.493, "lr": 4.045494480807155e-08, "epoch": 9.837526205450734, "percentage": 98.38, "elapsed_time": "1:24:53", "remaining_time": "0:01:24", "throughput": 2408.53, "total_tokens": 12267432}
3774
+ {"current_steps": 18775, "total_steps": 19080, "loss": 0.3779, "lr": 3.916493634860407e-08, "epoch": 9.84014675052411, "percentage": 98.4, "elapsed_time": "1:24:54", "remaining_time": "0:01:22", "throughput": 2408.6, "total_tokens": 12270888}
3775
+ {"current_steps": 18780, "total_steps": 19080, "loss": 0.4699, "lr": 3.789581397575515e-08, "epoch": 9.842767295597485, "percentage": 98.43, "elapsed_time": "1:24:55", "remaining_time": "0:01:21", "throughput": 2408.63, "total_tokens": 12273896}
3776
+ {"current_steps": 18785, "total_steps": 19080, "loss": 0.5473, "lr": 3.664757875146418e-08, "epoch": 9.84538784067086, "percentage": 98.45, "elapsed_time": "1:24:56", "remaining_time": "0:01:20", "throughput": 2408.61, "total_tokens": 12276328}
3777
+ {"current_steps": 18790, "total_steps": 19080, "loss": 0.4228, "lr": 3.5420231720198485e-08, "epoch": 9.848008385744235, "percentage": 98.48, "elapsed_time": "1:24:57", "remaining_time": "0:01:18", "throughput": 2408.61, "total_tokens": 12279016}
3778
+ {"current_steps": 18795, "total_steps": 19080, "loss": 0.4009, "lr": 3.421377390894764e-08, "epoch": 9.85062893081761, "percentage": 98.51, "elapsed_time": "1:24:59", "remaining_time": "0:01:17", "throughput": 2408.59, "total_tokens": 12281512}
3779
+ {"current_steps": 18800, "total_steps": 19080, "loss": 0.5596, "lr": 3.3028206327218035e-08, "epoch": 9.853249475890985, "percentage": 98.53, "elapsed_time": "1:25:00", "remaining_time": "0:01:15", "throughput": 2408.69, "total_tokens": 12285160}
3780
+ {"current_steps": 18805, "total_steps": 19080, "loss": 0.4887, "lr": 3.1863529967041117e-08, "epoch": 9.85587002096436, "percentage": 98.56, "elapsed_time": "1:25:01", "remaining_time": "0:01:14", "throughput": 2408.78, "total_tokens": 12288616}
3781
+ {"current_steps": 18810, "total_steps": 19080, "loss": 0.57, "lr": 3.071974580296233e-08, "epoch": 9.858490566037736, "percentage": 98.58, "elapsed_time": "1:25:02", "remaining_time": "0:01:13", "throughput": 2408.93, "total_tokens": 12292680}
3782
+ {"current_steps": 18815, "total_steps": 19080, "loss": 0.424, "lr": 2.9596854792052207e-08, "epoch": 9.86111111111111, "percentage": 98.61, "elapsed_time": "1:25:04", "remaining_time": "0:01:11", "throughput": 2409.12, "total_tokens": 12297160}
3783
+ {"current_steps": 18820, "total_steps": 19080, "loss": 0.648, "lr": 2.8494857873889724e-08, "epoch": 9.863731656184486, "percentage": 98.64, "elapsed_time": "1:25:05", "remaining_time": "0:01:10", "throughput": 2409.2, "total_tokens": 12300520}
3784
+ {"current_steps": 18825, "total_steps": 19080, "loss": 0.5104, "lr": 2.741375597057616e-08, "epoch": 9.866352201257861, "percentage": 98.66, "elapsed_time": "1:25:06", "remaining_time": "0:01:09", "throughput": 2409.22, "total_tokens": 12303496}
3785
+ {"current_steps": 18830, "total_steps": 19080, "loss": 0.558, "lr": 2.6353549986729566e-08, "epoch": 9.868972746331236, "percentage": 98.69, "elapsed_time": "1:25:08", "remaining_time": "0:01:07", "throughput": 2409.27, "total_tokens": 12306568}
3786
+ {"current_steps": 18835, "total_steps": 19080, "loss": 0.5724, "lr": 2.531424080948197e-08, "epoch": 9.871593291404611, "percentage": 98.72, "elapsed_time": "1:25:09", "remaining_time": "0:01:06", "throughput": 2409.25, "total_tokens": 12309160}
3787
+ {"current_steps": 18840, "total_steps": 19080, "loss": 0.4531, "lr": 2.4295829308482176e-08, "epoch": 9.874213836477988, "percentage": 98.74, "elapsed_time": "1:25:10", "remaining_time": "0:01:05", "throughput": 2409.35, "total_tokens": 12312776}
3788
+ {"current_steps": 18845, "total_steps": 19080, "loss": 0.4212, "lr": 2.329831633588464e-08, "epoch": 9.876834381551364, "percentage": 98.77, "elapsed_time": "1:25:11", "remaining_time": "0:01:03", "throughput": 2409.41, "total_tokens": 12316104}
3789
+ {"current_steps": 18850, "total_steps": 19080, "loss": 0.5838, "lr": 2.232170272636891e-08, "epoch": 9.879454926624739, "percentage": 98.79, "elapsed_time": "1:25:12", "remaining_time": "0:01:02", "throughput": 2409.38, "total_tokens": 12318568}
3790
+ {"current_steps": 18855, "total_steps": 19080, "loss": 0.538, "lr": 2.136598929711464e-08, "epoch": 9.882075471698114, "percentage": 98.82, "elapsed_time": "1:25:13", "remaining_time": "0:01:01", "throughput": 2409.43, "total_tokens": 12321736}
3791
+ {"current_steps": 18860, "total_steps": 19080, "loss": 0.51, "lr": 2.0431176847823807e-08, "epoch": 9.884696016771489, "percentage": 98.85, "elapsed_time": "1:25:15", "remaining_time": "0:00:59", "throughput": 2409.5, "total_tokens": 12325032}
3792
+ {"current_steps": 18865, "total_steps": 19080, "loss": 0.5172, "lr": 1.9517266160704038e-08, "epoch": 9.887316561844864, "percentage": 98.87, "elapsed_time": "1:25:16", "remaining_time": "0:00:58", "throughput": 2409.65, "total_tokens": 12329160}
3793
+ {"current_steps": 18870, "total_steps": 19080, "loss": 0.7113, "lr": 1.8624258000471405e-08, "epoch": 9.88993710691824, "percentage": 98.9, "elapsed_time": "1:25:17", "remaining_time": "0:00:56", "throughput": 2409.7, "total_tokens": 12332392}
3794
+ {"current_steps": 18875, "total_steps": 19080, "loss": 0.4527, "lr": 1.7752153114358737e-08, "epoch": 9.892557651991615, "percentage": 98.93, "elapsed_time": "1:25:19", "remaining_time": "0:00:55", "throughput": 2409.86, "total_tokens": 12336456}
3795
+ {"current_steps": 18880, "total_steps": 19080, "loss": 0.4309, "lr": 1.6900952232098977e-08, "epoch": 9.89517819706499, "percentage": 98.95, "elapsed_time": "1:25:20", "remaining_time": "0:00:54", "throughput": 2409.85, "total_tokens": 12339080}
3796
+ {"current_steps": 18885, "total_steps": 19080, "loss": 0.4131, "lr": 1.6070656065939048e-08, "epoch": 9.897798742138365, "percentage": 98.98, "elapsed_time": "1:25:21", "remaining_time": "0:00:52", "throughput": 2409.85, "total_tokens": 12341768}
3797
+ {"current_steps": 18890, "total_steps": 19080, "loss": 0.414, "lr": 1.526126531063432e-08, "epoch": 9.90041928721174, "percentage": 99.0, "elapsed_time": "1:25:22", "remaining_time": "0:00:51", "throughput": 2409.9, "total_tokens": 12344936}
3798
+ {"current_steps": 18895, "total_steps": 19080, "loss": 0.4877, "lr": 1.4472780643445817e-08, "epoch": 9.903039832285115, "percentage": 99.03, "elapsed_time": "1:25:24", "remaining_time": "0:00:50", "throughput": 2410.08, "total_tokens": 12349416}
3799
+ {"current_steps": 18900, "total_steps": 19080, "loss": 0.5503, "lr": 1.3705202724142996e-08, "epoch": 9.90566037735849, "percentage": 99.06, "elapsed_time": "1:25:25", "remaining_time": "0:00:48", "throughput": 2410.11, "total_tokens": 12352360}
3800
+ {"current_steps": 18905, "total_steps": 19080, "loss": 0.5392, "lr": 1.2958532194995432e-08, "epoch": 9.908280922431866, "percentage": 99.08, "elapsed_time": "1:25:26", "remaining_time": "0:00:47", "throughput": 2410.17, "total_tokens": 12355688}
3801
+ {"current_steps": 18910, "total_steps": 19080, "loss": 0.4929, "lr": 1.2232769680789457e-08, "epoch": 9.91090146750524, "percentage": 99.11, "elapsed_time": "1:25:27", "remaining_time": "0:00:46", "throughput": 2410.3, "total_tokens": 12359560}
3802
+ {"current_steps": 18915, "total_steps": 19080, "loss": 0.5336, "lr": 1.152791578880319e-08, "epoch": 9.913522012578616, "percentage": 99.14, "elapsed_time": "1:25:29", "remaining_time": "0:00:44", "throughput": 2410.44, "total_tokens": 12363656}
3803
+ {"current_steps": 18920, "total_steps": 19080, "loss": 0.4124, "lr": 1.0843971108828732e-08, "epoch": 9.916142557651991, "percentage": 99.16, "elapsed_time": "1:25:30", "remaining_time": "0:00:43", "throughput": 2410.59, "total_tokens": 12367688}
3804
+ {"current_steps": 18925, "total_steps": 19080, "loss": 0.4846, "lr": 1.018093621316385e-08, "epoch": 9.918763102725366, "percentage": 99.19, "elapsed_time": "1:25:31", "remaining_time": "0:00:42", "throughput": 2410.7, "total_tokens": 12371400}
3805
+ {"current_steps": 18930, "total_steps": 19080, "loss": 0.4637, "lr": 9.53881165659809e-09, "epoch": 9.921383647798741, "percentage": 99.21, "elapsed_time": "1:25:32", "remaining_time": "0:00:40", "throughput": 2410.71, "total_tokens": 12374152}
3806
+ {"current_steps": 18935, "total_steps": 19080, "loss": 0.3924, "lr": 8.91759797644054e-09, "epoch": 9.924004192872118, "percentage": 99.24, "elapsed_time": "1:25:34", "remaining_time": "0:00:39", "throughput": 2410.89, "total_tokens": 12378472}
3807
+ {"current_steps": 18940, "total_steps": 19080, "loss": 0.5325, "lr": 8.317295692486516e-09, "epoch": 9.926624737945493, "percentage": 99.27, "elapsed_time": "1:25:35", "remaining_time": "0:00:37", "throughput": 2410.92, "total_tokens": 12381480}
3808
+ {"current_steps": 18945, "total_steps": 19080, "loss": 0.4508, "lr": 7.737905307045323e-09, "epoch": 9.929245283018869, "percentage": 99.29, "elapsed_time": "1:25:36", "remaining_time": "0:00:36", "throughput": 2410.97, "total_tokens": 12384648}
3809
+ {"current_steps": 18950, "total_steps": 19080, "loss": 0.391, "lr": 7.179427304926378e-09, "epoch": 9.931865828092244, "percentage": 99.32, "elapsed_time": "1:25:37", "remaining_time": "0:00:35", "throughput": 2410.98, "total_tokens": 12387432}
3810
+ {"current_steps": 18955, "total_steps": 19080, "loss": 0.4593, "lr": 6.641862153433653e-09, "epoch": 9.934486373165619, "percentage": 99.34, "elapsed_time": "1:25:39", "remaining_time": "0:00:33", "throughput": 2411.07, "total_tokens": 12390984}
3811
+ {"current_steps": 18960, "total_steps": 19080, "loss": 0.4211, "lr": 6.125210302382333e-09, "epoch": 9.937106918238994, "percentage": 99.37, "elapsed_time": "1:25:40", "remaining_time": "0:00:32", "throughput": 2411.19, "total_tokens": 12394760}
3812
+ {"current_steps": 18965, "total_steps": 19080, "loss": 0.5481, "lr": 5.629472184079387e-09, "epoch": 9.93972746331237, "percentage": 99.4, "elapsed_time": "1:25:41", "remaining_time": "0:00:31", "throughput": 2411.22, "total_tokens": 12397768}
3813
+ {"current_steps": 18970, "total_steps": 19080, "loss": 0.4215, "lr": 5.154648213334668e-09, "epoch": 9.942348008385745, "percentage": 99.42, "elapsed_time": "1:25:42", "remaining_time": "0:00:29", "throughput": 2411.27, "total_tokens": 12400968}
3814
+ {"current_steps": 18975, "total_steps": 19080, "loss": 0.5535, "lr": 4.700738787466463e-09, "epoch": 9.94496855345912, "percentage": 99.45, "elapsed_time": "1:25:44", "remaining_time": "0:00:28", "throughput": 2411.55, "total_tokens": 12406664}
3815
+ {"current_steps": 18980, "total_steps": 19080, "loss": 0.4668, "lr": 4.26774428627652e-09, "epoch": 9.947589098532495, "percentage": 99.48, "elapsed_time": "1:25:45", "remaining_time": "0:00:27", "throughput": 2411.56, "total_tokens": 12409448}
3816
+ {"current_steps": 18985, "total_steps": 19080, "loss": 0.3727, "lr": 3.855665072080572e-09, "epoch": 9.95020964360587, "percentage": 99.5, "elapsed_time": "1:25:47", "remaining_time": "0:00:25", "throughput": 2411.62, "total_tokens": 12412744}
3817
+ {"current_steps": 18990, "total_steps": 19080, "loss": 0.656, "lr": 3.464501489683358e-09, "epoch": 9.952830188679245, "percentage": 99.53, "elapsed_time": "1:25:48", "remaining_time": "0:00:24", "throughput": 2411.65, "total_tokens": 12415656}
3818
+ {"current_steps": 18995, "total_steps": 19080, "loss": 0.5815, "lr": 3.094253866398056e-09, "epoch": 9.95545073375262, "percentage": 99.55, "elapsed_time": "1:25:49", "remaining_time": "0:00:23", "throughput": 2411.66, "total_tokens": 12418472}
3819
+ {"current_steps": 19000, "total_steps": 19080, "loss": 0.4373, "lr": 2.7449225120268484e-09, "epoch": 9.958071278825996, "percentage": 99.58, "elapsed_time": "1:25:50", "remaining_time": "0:00:21", "throughput": 2411.72, "total_tokens": 12421768}
3820
+ {"current_steps": 19005, "total_steps": 19080, "loss": 0.4565, "lr": 2.416507718877581e-09, "epoch": 9.96069182389937, "percentage": 99.61, "elapsed_time": "1:25:51", "remaining_time": "0:00:20", "throughput": 2411.86, "total_tokens": 12425800}
3821
+ {"current_steps": 19010, "total_steps": 19080, "loss": 0.5965, "lr": 2.109009761747105e-09, "epoch": 9.963312368972746, "percentage": 99.63, "elapsed_time": "1:25:53", "remaining_time": "0:00:18", "throughput": 2411.94, "total_tokens": 12429288}
3822
+ {"current_steps": 19015, "total_steps": 19080, "loss": 0.4221, "lr": 1.8224288979434844e-09, "epoch": 9.965932914046121, "percentage": 99.66, "elapsed_time": "1:25:54", "remaining_time": "0:00:17", "throughput": 2412.06, "total_tokens": 12433160}
3823
+ {"current_steps": 19020, "total_steps": 19080, "loss": 0.4764, "lr": 1.5567653672554638e-09, "epoch": 9.968553459119496, "percentage": 99.69, "elapsed_time": "1:25:55", "remaining_time": "0:00:16", "throughput": 2412.07, "total_tokens": 12435944}
3824
+ {"current_steps": 19025, "total_steps": 19080, "loss": 0.4212, "lr": 1.3120193919857748e-09, "epoch": 9.971174004192871, "percentage": 99.71, "elapsed_time": "1:25:56", "remaining_time": "0:00:14", "throughput": 2412.01, "total_tokens": 12438216}
3825
+ {"current_steps": 19030, "total_steps": 19080, "loss": 0.4396, "lr": 1.0881911769261565e-09, "epoch": 9.973794549266248, "percentage": 99.74, "elapsed_time": "1:25:57", "remaining_time": "0:00:13", "throughput": 2412.01, "total_tokens": 12440904}
3826
+ {"current_steps": 19035, "total_steps": 19080, "loss": 0.5479, "lr": 8.852809093601311e-10, "epoch": 9.976415094339622, "percentage": 99.76, "elapsed_time": "1:25:59", "remaining_time": "0:00:12", "throughput": 2412.02, "total_tokens": 12443752}
3827
+ {"current_steps": 19040, "total_steps": 19080, "loss": 0.708, "lr": 7.03288759076881e-10, "epoch": 9.979035639412999, "percentage": 99.79, "elapsed_time": "1:26:00", "remaining_time": "0:00:10", "throughput": 2411.99, "total_tokens": 12446152}
3828
+ {"current_steps": 19045, "total_steps": 19080, "loss": 0.496, "lr": 5.422148783629233e-10, "epoch": 9.981656184486374, "percentage": 99.82, "elapsed_time": "1:26:01", "remaining_time": "0:00:09", "throughput": 2412.02, "total_tokens": 12449160}
3829
+ {"current_steps": 19050, "total_steps": 19080, "loss": 0.5216, "lr": 4.0205940199100623e-10, "epoch": 9.984276729559749, "percentage": 99.84, "elapsed_time": "1:26:02", "remaining_time": "0:00:08", "throughput": 2412.15, "total_tokens": 12453064}
3830
+ {"current_steps": 19055, "total_steps": 19080, "loss": 0.5004, "lr": 2.828224472395391e-10, "epoch": 9.986897274633124, "percentage": 99.87, "elapsed_time": "1:26:03", "remaining_time": "0:00:06", "throughput": 2412.17, "total_tokens": 12455944}
3831
+ {"current_steps": 19060, "total_steps": 19080, "loss": 0.5452, "lr": 1.8450411388426515e-10, "epoch": 9.9895178197065, "percentage": 99.9, "elapsed_time": "1:26:05", "remaining_time": "0:00:05", "throughput": 2412.27, "total_tokens": 12459528}
3832
+ {"current_steps": 19065, "total_steps": 19080, "loss": 0.5028, "lr": 1.0710448418715935e-10, "epoch": 9.992138364779874, "percentage": 99.92, "elapsed_time": "1:26:06", "remaining_time": "0:00:04", "throughput": 2412.35, "total_tokens": 12463048}
3833
+ {"current_steps": 19070, "total_steps": 19080, "loss": 0.4727, "lr": 5.062362291585743e-11, "epoch": 9.99475890985325, "percentage": 99.95, "elapsed_time": "1:26:07", "remaining_time": "0:00:02", "throughput": 2412.51, "total_tokens": 12467240}
3834
+ {"current_steps": 19075, "total_steps": 19080, "loss": 0.683, "lr": 1.5061577329777976e-11, "epoch": 9.997379454926625, "percentage": 99.97, "elapsed_time": "1:26:08", "remaining_time": "0:00:01", "throughput": 2412.53, "total_tokens": 12470216}
3835
+ {"current_steps": 19080, "total_steps": 19080, "loss": 0.672, "lr": 4.183771884491705e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:26:10", "remaining_time": "0:00:00", "throughput": 2412.52, "total_tokens": 12472912}
3836
+ {"current_steps": 19080, "total_steps": 19080, "eval_loss": 0.49317190051078796, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:26:34", "remaining_time": "0:00:00", "throughput": 2401.24, "total_tokens": 12472912}
3837
+ {"current_steps": 19080, "total_steps": 19080, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:26:35", "remaining_time": "0:00:00", "throughput": 2400.69, "total_tokens": 12472912}