rbelanec commited on
Commit
290c8d5
·
verified ·
1 Parent(s): db22241

Training in progress, step 18126

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +191 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c78c8dbd72b8b2e8d745e476a0682571b29037cc5d3d921a9c4a136feecceacd
3
  size 26214528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c2691d2278d3d8d70be3d74d21d5346cd2e1dc3ccd52b8ca3dc30ee249fdb3
3
  size 26214528
trainer_log.jsonl CHANGED
@@ -3453,3 +3453,194 @@
3453
  {"current_steps": 17175, "total_steps": 19080, "loss": 5.4825, "lr": 1.5045574483889463e-06, "epoch": 9.001572327044025, "percentage": 90.02, "elapsed_time": "0:52:26", "remaining_time": "0:05:48", "throughput": 3568.22, "total_tokens": 11227112}
3454
  {"current_steps": 17180, "total_steps": 19080, "loss": 4.8449, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "0:52:27", "remaining_time": "0:05:48", "throughput": 3568.25, "total_tokens": 11229544}
3455
  {"current_steps": 17185, "total_steps": 19080, "loss": 5.1874, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "0:52:28", "remaining_time": "0:05:47", "throughput": 3568.41, "total_tokens": 11233640}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3453
  {"current_steps": 17175, "total_steps": 19080, "loss": 5.4825, "lr": 1.5045574483889463e-06, "epoch": 9.001572327044025, "percentage": 90.02, "elapsed_time": "0:52:26", "remaining_time": "0:05:48", "throughput": 3568.22, "total_tokens": 11227112}
3454
  {"current_steps": 17180, "total_steps": 19080, "loss": 4.8449, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "0:52:27", "remaining_time": "0:05:48", "throughput": 3568.25, "total_tokens": 11229544}
3455
  {"current_steps": 17185, "total_steps": 19080, "loss": 5.1874, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "0:52:28", "remaining_time": "0:05:47", "throughput": 3568.41, "total_tokens": 11233640}
3456
+ {"current_steps": 17190, "total_steps": 19080, "loss": 4.7053, "lr": 1.481205003070424e-06, "epoch": 9.00943396226415, "percentage": 90.09, "elapsed_time": "0:52:28", "remaining_time": "0:05:46", "throughput": 3568.47, "total_tokens": 11236552}
3457
+ {"current_steps": 17195, "total_steps": 19080, "loss": 5.2198, "lr": 1.4734602048695312e-06, "epoch": 9.012054507337526, "percentage": 90.12, "elapsed_time": "0:52:29", "remaining_time": "0:05:45", "throughput": 3568.61, "total_tokens": 11240008}
3458
+ {"current_steps": 17200, "total_steps": 19080, "loss": 5.1301, "lr": 1.465735092602491e-06, "epoch": 9.014675052410901, "percentage": 90.15, "elapsed_time": "0:52:30", "remaining_time": "0:05:44", "throughput": 3568.7, "total_tokens": 11243176}
3459
+ {"current_steps": 17205, "total_steps": 19080, "loss": 4.8785, "lr": 1.4580296727333187e-06, "epoch": 9.017295597484276, "percentage": 90.17, "elapsed_time": "0:52:31", "remaining_time": "0:05:43", "throughput": 3568.83, "total_tokens": 11246568}
3460
+ {"current_steps": 17210, "total_steps": 19080, "loss": 4.8614, "lr": 1.450343951709568e-06, "epoch": 9.019916142557651, "percentage": 90.2, "elapsed_time": "0:52:32", "remaining_time": "0:05:42", "throughput": 3569.05, "total_tokens": 11251080}
3461
+ {"current_steps": 17215, "total_steps": 19080, "loss": 4.8627, "lr": 1.4426779359622916e-06, "epoch": 9.022536687631026, "percentage": 90.23, "elapsed_time": "0:52:33", "remaining_time": "0:05:41", "throughput": 3569.06, "total_tokens": 11253256}
3462
+ {"current_steps": 17220, "total_steps": 19080, "loss": 4.9337, "lr": 1.4350316319060585e-06, "epoch": 9.025157232704403, "percentage": 90.25, "elapsed_time": "0:52:33", "remaining_time": "0:05:40", "throughput": 3569.2, "total_tokens": 11256936}
3463
+ {"current_steps": 17225, "total_steps": 19080, "loss": 4.2928, "lr": 1.4274050459389594e-06, "epoch": 9.027777777777779, "percentage": 90.28, "elapsed_time": "0:52:34", "remaining_time": "0:05:39", "throughput": 3569.26, "total_tokens": 11259784}
3464
+ {"current_steps": 17230, "total_steps": 19080, "loss": 4.5473, "lr": 1.4197981844425583e-06, "epoch": 9.030398322851154, "percentage": 90.3, "elapsed_time": "0:52:35", "remaining_time": "0:05:38", "throughput": 3569.35, "total_tokens": 11262728}
3465
+ {"current_steps": 17235, "total_steps": 19080, "loss": 4.9116, "lr": 1.4122110537819365e-06, "epoch": 9.033018867924529, "percentage": 90.33, "elapsed_time": "0:52:36", "remaining_time": "0:05:37", "throughput": 3569.42, "total_tokens": 11265640}
3466
+ {"current_steps": 17240, "total_steps": 19080, "loss": 4.6044, "lr": 1.4046436603056601e-06, "epoch": 9.035639412997904, "percentage": 90.36, "elapsed_time": "0:52:37", "remaining_time": "0:05:36", "throughput": 3569.61, "total_tokens": 11270344}
3467
+ {"current_steps": 17245, "total_steps": 19080, "loss": 4.3928, "lr": 1.397096010345772e-06, "epoch": 9.03825995807128, "percentage": 90.38, "elapsed_time": "0:52:37", "remaining_time": "0:05:36", "throughput": 3569.61, "total_tokens": 11272584}
3468
+ {"current_steps": 17250, "total_steps": 19080, "loss": 4.9733, "lr": 1.3895681102178094e-06, "epoch": 9.040880503144654, "percentage": 90.41, "elapsed_time": "0:52:38", "remaining_time": "0:05:35", "throughput": 3569.78, "total_tokens": 11276872}
3469
+ {"current_steps": 17255, "total_steps": 19080, "loss": 4.564, "lr": 1.3820599662207695e-06, "epoch": 9.04350104821803, "percentage": 90.44, "elapsed_time": "0:52:39", "remaining_time": "0:05:34", "throughput": 3569.84, "total_tokens": 11279688}
3470
+ {"current_steps": 17260, "total_steps": 19080, "loss": 4.4587, "lr": 1.3745715846371244e-06, "epoch": 9.046121593291405, "percentage": 90.46, "elapsed_time": "0:52:40", "remaining_time": "0:05:33", "throughput": 3569.94, "total_tokens": 11282888}
3471
+ {"current_steps": 17265, "total_steps": 19080, "loss": 4.5152, "lr": 1.3671029717328142e-06, "epoch": 9.04874213836478, "percentage": 90.49, "elapsed_time": "0:52:41", "remaining_time": "0:05:32", "throughput": 3570.02, "total_tokens": 11285928}
3472
+ {"current_steps": 17270, "total_steps": 19080, "loss": 4.7721, "lr": 1.3596541337572265e-06, "epoch": 9.051362683438155, "percentage": 90.51, "elapsed_time": "0:52:42", "remaining_time": "0:05:31", "throughput": 3570.14, "total_tokens": 11289288}
3473
+ {"current_steps": 17275, "total_steps": 19080, "loss": 5.4317, "lr": 1.3522250769432115e-06, "epoch": 9.05398322851153, "percentage": 90.54, "elapsed_time": "0:52:43", "remaining_time": "0:05:30", "throughput": 3570.5, "total_tokens": 11295976}
3474
+ {"current_steps": 17280, "total_steps": 19080, "loss": 5.0974, "lr": 1.3448158075070687e-06, "epoch": 9.056603773584905, "percentage": 90.57, "elapsed_time": "0:52:44", "remaining_time": "0:05:29", "throughput": 3570.63, "total_tokens": 11299816}
3475
+ {"current_steps": 17285, "total_steps": 19080, "loss": 5.1409, "lr": 1.337426331648528e-06, "epoch": 9.05922431865828, "percentage": 90.59, "elapsed_time": "0:52:45", "remaining_time": "0:05:28", "throughput": 3570.73, "total_tokens": 11303176}
3476
+ {"current_steps": 17290, "total_steps": 19080, "loss": 4.0771, "lr": 1.3300566555507709e-06, "epoch": 9.061844863731656, "percentage": 90.62, "elapsed_time": "0:52:46", "remaining_time": "0:05:27", "throughput": 3570.85, "total_tokens": 11306376}
3477
+ {"current_steps": 17295, "total_steps": 19080, "loss": 4.7883, "lr": 1.3227067853804065e-06, "epoch": 9.064465408805031, "percentage": 90.64, "elapsed_time": "0:52:47", "remaining_time": "0:05:26", "throughput": 3571.04, "total_tokens": 11310696}
3478
+ {"current_steps": 17300, "total_steps": 19080, "loss": 4.5838, "lr": 1.315376727287465e-06, "epoch": 9.067085953878406, "percentage": 90.67, "elapsed_time": "0:52:48", "remaining_time": "0:05:25", "throughput": 3571.23, "total_tokens": 11314888}
3479
+ {"current_steps": 17305, "total_steps": 19080, "loss": 4.4454, "lr": 1.3080664874054127e-06, "epoch": 9.069706498951781, "percentage": 90.7, "elapsed_time": "0:52:49", "remaining_time": "0:05:25", "throughput": 3571.31, "total_tokens": 11317576}
3480
+ {"current_steps": 17310, "total_steps": 19080, "loss": 5.3457, "lr": 1.3007760718511176e-06, "epoch": 9.072327044025156, "percentage": 90.72, "elapsed_time": "0:52:49", "remaining_time": "0:05:24", "throughput": 3571.41, "total_tokens": 11320840}
3481
+ {"current_steps": 17315, "total_steps": 19080, "loss": 4.1925, "lr": 1.2935054867248692e-06, "epoch": 9.074947589098532, "percentage": 90.75, "elapsed_time": "0:52:50", "remaining_time": "0:05:23", "throughput": 3571.49, "total_tokens": 11324040}
3482
+ {"current_steps": 17320, "total_steps": 19080, "loss": 4.8688, "lr": 1.2862547381103567e-06, "epoch": 9.077568134171909, "percentage": 90.78, "elapsed_time": "0:52:51", "remaining_time": "0:05:22", "throughput": 3571.64, "total_tokens": 11327784}
3483
+ {"current_steps": 17325, "total_steps": 19080, "loss": 5.0509, "lr": 1.2790238320746827e-06, "epoch": 9.080188679245284, "percentage": 90.8, "elapsed_time": "0:52:52", "remaining_time": "0:05:21", "throughput": 3571.77, "total_tokens": 11331304}
3484
+ {"current_steps": 17330, "total_steps": 19080, "loss": 4.9781, "lr": 1.271812774668335e-06, "epoch": 9.082809224318659, "percentage": 90.83, "elapsed_time": "0:52:53", "remaining_time": "0:05:20", "throughput": 3571.86, "total_tokens": 11334312}
3485
+ {"current_steps": 17335, "total_steps": 19080, "loss": 5.2861, "lr": 1.2646215719251952e-06, "epoch": 9.085429769392034, "percentage": 90.85, "elapsed_time": "0:52:54", "remaining_time": "0:05:19", "throughput": 3572.08, "total_tokens": 11339144}
3486
+ {"current_steps": 17340, "total_steps": 19080, "loss": 4.8642, "lr": 1.2574502298625334e-06, "epoch": 9.08805031446541, "percentage": 90.88, "elapsed_time": "0:52:55", "remaining_time": "0:05:18", "throughput": 3572.21, "total_tokens": 11342664}
3487
+ {"current_steps": 17345, "total_steps": 19080, "loss": 4.6836, "lr": 1.250298754481008e-06, "epoch": 9.090670859538784, "percentage": 90.91, "elapsed_time": "0:52:56", "remaining_time": "0:05:17", "throughput": 3572.3, "total_tokens": 11345768}
3488
+ {"current_steps": 17350, "total_steps": 19080, "loss": 5.2562, "lr": 1.2431671517646403e-06, "epoch": 9.09329140461216, "percentage": 90.93, "elapsed_time": "0:52:56", "remaining_time": "0:05:16", "throughput": 3572.47, "total_tokens": 11349448}
3489
+ {"current_steps": 17355, "total_steps": 19080, "loss": 4.4867, "lr": 1.2360554276808295e-06, "epoch": 9.095911949685535, "percentage": 90.96, "elapsed_time": "0:52:57", "remaining_time": "0:05:15", "throughput": 3572.58, "total_tokens": 11352744}
3490
+ {"current_steps": 17360, "total_steps": 19080, "loss": 4.8949, "lr": 1.228963588180343e-06, "epoch": 9.09853249475891, "percentage": 90.99, "elapsed_time": "0:52:58", "remaining_time": "0:05:14", "throughput": 3572.59, "total_tokens": 11354920}
3491
+ {"current_steps": 17365, "total_steps": 19080, "loss": 5.2611, "lr": 1.2218916391973118e-06, "epoch": 9.101153039832285, "percentage": 91.01, "elapsed_time": "0:52:59", "remaining_time": "0:05:14", "throughput": 3572.78, "total_tokens": 11359240}
3492
+ {"current_steps": 17370, "total_steps": 19080, "loss": 4.7021, "lr": 1.2148395866492135e-06, "epoch": 9.10377358490566, "percentage": 91.04, "elapsed_time": "0:53:00", "remaining_time": "0:05:13", "throughput": 3572.85, "total_tokens": 11362056}
3493
+ {"current_steps": 17375, "total_steps": 19080, "loss": 4.271, "lr": 1.2078074364368862e-06, "epoch": 9.106394129979035, "percentage": 91.06, "elapsed_time": "0:53:00", "remaining_time": "0:05:12", "throughput": 3572.89, "total_tokens": 11364648}
3494
+ {"current_steps": 17380, "total_steps": 19080, "loss": 4.9505, "lr": 1.2007951944445122e-06, "epoch": 9.10901467505241, "percentage": 91.09, "elapsed_time": "0:53:01", "remaining_time": "0:05:11", "throughput": 3573.03, "total_tokens": 11368168}
3495
+ {"current_steps": 17385, "total_steps": 19080, "loss": 4.5032, "lr": 1.1938028665396173e-06, "epoch": 9.111635220125786, "percentage": 91.12, "elapsed_time": "0:53:02", "remaining_time": "0:05:10", "throughput": 3573.09, "total_tokens": 11370856}
3496
+ {"current_steps": 17390, "total_steps": 19080, "loss": 4.5134, "lr": 1.1868304585730571e-06, "epoch": 9.114255765199161, "percentage": 91.14, "elapsed_time": "0:53:03", "remaining_time": "0:05:09", "throughput": 3573.18, "total_tokens": 11373768}
3497
+ {"current_steps": 17395, "total_steps": 19080, "loss": 5.4472, "lr": 1.1798779763790346e-06, "epoch": 9.116876310272536, "percentage": 91.17, "elapsed_time": "0:53:03", "remaining_time": "0:05:08", "throughput": 3573.31, "total_tokens": 11377192}
3498
+ {"current_steps": 17400, "total_steps": 19080, "loss": 4.7259, "lr": 1.1729454257750544e-06, "epoch": 9.119496855345911, "percentage": 91.19, "elapsed_time": "0:53:04", "remaining_time": "0:05:07", "throughput": 3573.37, "total_tokens": 11379912}
3499
+ {"current_steps": 17405, "total_steps": 19080, "loss": 5.1425, "lr": 1.1660328125619652e-06, "epoch": 9.122117400419286, "percentage": 91.22, "elapsed_time": "0:53:05", "remaining_time": "0:05:06", "throughput": 3573.51, "total_tokens": 11383496}
3500
+ {"current_steps": 17410, "total_steps": 19080, "loss": 5.006, "lr": 1.1591401425239318e-06, "epoch": 9.124737945492662, "percentage": 91.25, "elapsed_time": "0:53:06", "remaining_time": "0:05:05", "throughput": 3573.57, "total_tokens": 11386504}
3501
+ {"current_steps": 17415, "total_steps": 19080, "loss": 4.7213, "lr": 1.1522674214284158e-06, "epoch": 9.127358490566039, "percentage": 91.27, "elapsed_time": "0:53:06", "remaining_time": "0:05:04", "throughput": 3573.59, "total_tokens": 11388776}
3502
+ {"current_steps": 17420, "total_steps": 19080, "loss": 5.4081, "lr": 1.145414655026203e-06, "epoch": 9.129979035639414, "percentage": 91.3, "elapsed_time": "0:53:07", "remaining_time": "0:05:03", "throughput": 3573.69, "total_tokens": 11391976}
3503
+ {"current_steps": 17425, "total_steps": 19080, "loss": 4.7462, "lr": 1.1385818490513733e-06, "epoch": 9.132599580712789, "percentage": 91.33, "elapsed_time": "0:53:08", "remaining_time": "0:05:02", "throughput": 3573.77, "total_tokens": 11394856}
3504
+ {"current_steps": 17430, "total_steps": 19080, "loss": 4.6352, "lr": 1.1317690092213007e-06, "epoch": 9.135220125786164, "percentage": 91.35, "elapsed_time": "0:53:09", "remaining_time": "0:05:01", "throughput": 3573.87, "total_tokens": 11397896}
3505
+ {"current_steps": 17435, "total_steps": 19080, "loss": 4.9955, "lr": 1.124976141236675e-06, "epoch": 9.13784067085954, "percentage": 91.38, "elapsed_time": "0:53:10", "remaining_time": "0:05:00", "throughput": 3573.94, "total_tokens": 11401096}
3506
+ {"current_steps": 17440, "total_steps": 19080, "loss": 4.6331, "lr": 1.1182032507814354e-06, "epoch": 9.140461215932914, "percentage": 91.4, "elapsed_time": "0:53:10", "remaining_time": "0:05:00", "throughput": 3574.06, "total_tokens": 11404616}
3507
+ {"current_steps": 17445, "total_steps": 19080, "loss": 4.3902, "lr": 1.1114503435228434e-06, "epoch": 9.14308176100629, "percentage": 91.43, "elapsed_time": "0:53:11", "remaining_time": "0:04:59", "throughput": 3574.15, "total_tokens": 11407848}
3508
+ {"current_steps": 17450, "total_steps": 19080, "loss": 4.9676, "lr": 1.1047174251114234e-06, "epoch": 9.145702306079665, "percentage": 91.46, "elapsed_time": "0:53:12", "remaining_time": "0:04:58", "throughput": 3574.19, "total_tokens": 11410760}
3509
+ {"current_steps": 17455, "total_steps": 19080, "loss": 4.6585, "lr": 1.0980045011809604e-06, "epoch": 9.14832285115304, "percentage": 91.48, "elapsed_time": "0:53:13", "remaining_time": "0:04:57", "throughput": 3574.27, "total_tokens": 11413864}
3510
+ {"current_steps": 17460, "total_steps": 19080, "loss": 4.3511, "lr": 1.0913115773485388e-06, "epoch": 9.150943396226415, "percentage": 91.51, "elapsed_time": "0:53:14", "remaining_time": "0:04:56", "throughput": 3574.37, "total_tokens": 11416808}
3511
+ {"current_steps": 17465, "total_steps": 19080, "loss": 4.8272, "lr": 1.084638659214482e-06, "epoch": 9.15356394129979, "percentage": 91.54, "elapsed_time": "0:53:14", "remaining_time": "0:04:55", "throughput": 3574.47, "total_tokens": 11419944}
3512
+ {"current_steps": 17470, "total_steps": 19080, "loss": 4.485, "lr": 1.0779857523623815e-06, "epoch": 9.156184486373165, "percentage": 91.56, "elapsed_time": "0:53:15", "remaining_time": "0:04:54", "throughput": 3574.5, "total_tokens": 11422728}
3513
+ {"current_steps": 17475, "total_steps": 19080, "loss": 5.3814, "lr": 1.071352862359093e-06, "epoch": 9.15880503144654, "percentage": 91.59, "elapsed_time": "0:53:17", "remaining_time": "0:04:53", "throughput": 3574.96, "total_tokens": 11430984}
3514
+ {"current_steps": 17480, "total_steps": 19080, "loss": 4.9479, "lr": 1.0647399947547127e-06, "epoch": 9.161425576519916, "percentage": 91.61, "elapsed_time": "0:53:18", "remaining_time": "0:04:52", "throughput": 3575.03, "total_tokens": 11433672}
3515
+ {"current_steps": 17485, "total_steps": 19080, "loss": 4.6195, "lr": 1.0581471550825812e-06, "epoch": 9.164046121593291, "percentage": 91.64, "elapsed_time": "0:53:18", "remaining_time": "0:04:51", "throughput": 3575.06, "total_tokens": 11436168}
3516
+ {"current_steps": 17490, "total_steps": 19080, "loss": 4.1144, "lr": 1.0515743488592939e-06, "epoch": 9.166666666666666, "percentage": 91.67, "elapsed_time": "0:53:19", "remaining_time": "0:04:50", "throughput": 3575.19, "total_tokens": 11439528}
3517
+ {"current_steps": 17495, "total_steps": 19080, "loss": 4.5112, "lr": 1.0450215815846736e-06, "epoch": 9.169287211740041, "percentage": 91.69, "elapsed_time": "0:53:20", "remaining_time": "0:04:49", "throughput": 3575.25, "total_tokens": 11442312}
3518
+ {"current_steps": 17500, "total_steps": 19080, "loss": 4.8115, "lr": 1.0384888587417736e-06, "epoch": 9.171907756813416, "percentage": 91.72, "elapsed_time": "0:53:21", "remaining_time": "0:04:49", "throughput": 3575.34, "total_tokens": 11445416}
3519
+ {"current_steps": 17505, "total_steps": 19080, "loss": 4.4227, "lr": 1.0319761857968735e-06, "epoch": 9.174528301886792, "percentage": 91.75, "elapsed_time": "0:53:22", "remaining_time": "0:04:48", "throughput": 3575.47, "total_tokens": 11448712}
3520
+ {"current_steps": 17510, "total_steps": 19080, "loss": 4.5735, "lr": 1.0254835681994895e-06, "epoch": 9.177148846960169, "percentage": 91.77, "elapsed_time": "0:53:22", "remaining_time": "0:04:47", "throughput": 3575.57, "total_tokens": 11452104}
3521
+ {"current_steps": 17515, "total_steps": 19080, "loss": 4.9944, "lr": 1.0190110113823426e-06, "epoch": 9.179769392033544, "percentage": 91.8, "elapsed_time": "0:53:23", "remaining_time": "0:04:46", "throughput": 3575.75, "total_tokens": 11456136}
3522
+ {"current_steps": 17520, "total_steps": 19080, "loss": 4.1644, "lr": 1.0125585207613752e-06, "epoch": 9.182389937106919, "percentage": 91.82, "elapsed_time": "0:53:24", "remaining_time": "0:04:45", "throughput": 3575.85, "total_tokens": 11459272}
3523
+ {"current_steps": 17525, "total_steps": 19080, "loss": 4.7309, "lr": 1.0061261017357327e-06, "epoch": 9.185010482180294, "percentage": 91.85, "elapsed_time": "0:53:25", "remaining_time": "0:04:44", "throughput": 3576.02, "total_tokens": 11463208}
3524
+ {"current_steps": 17530, "total_steps": 19080, "loss": 4.6128, "lr": 9.997137596877732e-07, "epoch": 9.18763102725367, "percentage": 91.88, "elapsed_time": "0:53:26", "remaining_time": "0:04:43", "throughput": 3576.13, "total_tokens": 11466472}
3525
+ {"current_steps": 17535, "total_steps": 19080, "loss": 5.3524, "lr": 9.93321499983052e-07, "epoch": 9.190251572327044, "percentage": 91.9, "elapsed_time": "0:53:27", "remaining_time": "0:04:42", "throughput": 3576.22, "total_tokens": 11469192}
3526
+ {"current_steps": 17540, "total_steps": 19080, "loss": 4.379, "lr": 9.869493279703158e-07, "epoch": 9.19287211740042, "percentage": 91.93, "elapsed_time": "0:53:27", "remaining_time": "0:04:41", "throughput": 3576.29, "total_tokens": 11472232}
3527
+ {"current_steps": 17545, "total_steps": 19080, "loss": 4.436, "lr": 9.805972489815102e-07, "epoch": 9.195492662473795, "percentage": 91.95, "elapsed_time": "0:53:28", "remaining_time": "0:04:40", "throughput": 3576.43, "total_tokens": 11476040}
3528
+ {"current_steps": 17550, "total_steps": 19080, "loss": 4.5329, "lr": 9.742652683317643e-07, "epoch": 9.19811320754717, "percentage": 91.98, "elapsed_time": "0:53:29", "remaining_time": "0:04:39", "throughput": 3576.47, "total_tokens": 11478728}
3529
+ {"current_steps": 17555, "total_steps": 19080, "loss": 4.7362, "lr": 9.679533913193927e-07, "epoch": 9.200733752620545, "percentage": 92.01, "elapsed_time": "0:53:30", "remaining_time": "0:04:38", "throughput": 3576.52, "total_tokens": 11481448}
3530
+ {"current_steps": 17560, "total_steps": 19080, "loss": 4.3449, "lr": 9.61661623225879e-07, "epoch": 9.20335429769392, "percentage": 92.03, "elapsed_time": "0:53:30", "remaining_time": "0:04:37", "throughput": 3576.62, "total_tokens": 11484424}
3531
+ {"current_steps": 17565, "total_steps": 19080, "loss": 4.8636, "lr": 9.553899693158951e-07, "epoch": 9.205974842767295, "percentage": 92.06, "elapsed_time": "0:53:31", "remaining_time": "0:04:37", "throughput": 3576.71, "total_tokens": 11487560}
3532
+ {"current_steps": 17570, "total_steps": 19080, "loss": 5.0432, "lr": 9.491384348372684e-07, "epoch": 9.20859538784067, "percentage": 92.09, "elapsed_time": "0:53:32", "remaining_time": "0:04:36", "throughput": 3576.79, "total_tokens": 11490632}
3533
+ {"current_steps": 17575, "total_steps": 19080, "loss": 4.439, "lr": 9.429070250210004e-07, "epoch": 9.211215932914046, "percentage": 92.11, "elapsed_time": "0:53:33", "remaining_time": "0:04:35", "throughput": 3576.83, "total_tokens": 11493032}
3534
+ {"current_steps": 17580, "total_steps": 19080, "loss": 5.0909, "lr": 9.366957450812535e-07, "epoch": 9.213836477987421, "percentage": 92.14, "elapsed_time": "0:53:33", "remaining_time": "0:04:34", "throughput": 3576.92, "total_tokens": 11496200}
3535
+ {"current_steps": 17585, "total_steps": 19080, "loss": 4.0487, "lr": 9.305046002153345e-07, "epoch": 9.216457023060796, "percentage": 92.16, "elapsed_time": "0:53:34", "remaining_time": "0:04:33", "throughput": 3576.96, "total_tokens": 11498696}
3536
+ {"current_steps": 17590, "total_steps": 19080, "loss": 4.8605, "lr": 9.243335956037186e-07, "epoch": 9.219077568134171, "percentage": 92.19, "elapsed_time": "0:53:35", "remaining_time": "0:04:32", "throughput": 3577.06, "total_tokens": 11502312}
3537
+ {"current_steps": 17595, "total_steps": 19080, "loss": 4.4524, "lr": 9.181827364100171e-07, "epoch": 9.221698113207546, "percentage": 92.22, "elapsed_time": "0:53:36", "remaining_time": "0:04:31", "throughput": 3577.12, "total_tokens": 11505160}
3538
+ {"current_steps": 17600, "total_steps": 19080, "loss": 4.4516, "lr": 9.120520277809852e-07, "epoch": 9.224318658280922, "percentage": 92.24, "elapsed_time": "0:53:37", "remaining_time": "0:04:30", "throughput": 3577.18, "total_tokens": 11508456}
3539
+ {"current_steps": 17605, "total_steps": 19080, "loss": 4.9652, "lr": 9.059414748465278e-07, "epoch": 9.226939203354299, "percentage": 92.27, "elapsed_time": "0:53:38", "remaining_time": "0:04:29", "throughput": 3577.3, "total_tokens": 11511816}
3540
+ {"current_steps": 17610, "total_steps": 19080, "loss": 5.1351, "lr": 8.998510827196715e-07, "epoch": 9.229559748427674, "percentage": 92.3, "elapsed_time": "0:53:38", "remaining_time": "0:04:28", "throughput": 3577.3, "total_tokens": 11514088}
3541
+ {"current_steps": 17615, "total_steps": 19080, "loss": 4.6463, "lr": 8.937808564965733e-07, "epoch": 9.232180293501049, "percentage": 92.32, "elapsed_time": "0:53:39", "remaining_time": "0:04:27", "throughput": 3577.38, "total_tokens": 11517096}
3542
+ {"current_steps": 17620, "total_steps": 19080, "loss": 4.1463, "lr": 8.877308012565339e-07, "epoch": 9.234800838574424, "percentage": 92.35, "elapsed_time": "0:53:40", "remaining_time": "0:04:26", "throughput": 3577.46, "total_tokens": 11520168}
3543
+ {"current_steps": 17625, "total_steps": 19080, "loss": 5.0439, "lr": 8.817009220619482e-07, "epoch": 9.2374213836478, "percentage": 92.37, "elapsed_time": "0:53:41", "remaining_time": "0:04:25", "throughput": 3577.63, "total_tokens": 11523944}
3544
+ {"current_steps": 17630, "total_steps": 19080, "loss": 5.4636, "lr": 8.756912239583554e-07, "epoch": 9.240041928721174, "percentage": 92.4, "elapsed_time": "0:53:42", "remaining_time": "0:04:24", "throughput": 3577.78, "total_tokens": 11527720}
3545
+ {"current_steps": 17635, "total_steps": 19080, "loss": 4.8167, "lr": 8.697017119743911e-07, "epoch": 9.24266247379455, "percentage": 92.43, "elapsed_time": "0:53:42", "remaining_time": "0:04:24", "throughput": 3577.91, "total_tokens": 11531304}
3546
+ {"current_steps": 17640, "total_steps": 19080, "loss": 4.9497, "lr": 8.637323911218048e-07, "epoch": 9.245283018867925, "percentage": 92.45, "elapsed_time": "0:53:43", "remaining_time": "0:04:23", "throughput": 3578.05, "total_tokens": 11535176}
3547
+ {"current_steps": 17645, "total_steps": 19080, "loss": 4.1837, "lr": 8.577832663954538e-07, "epoch": 9.2479035639413, "percentage": 92.48, "elapsed_time": "0:53:44", "remaining_time": "0:04:22", "throughput": 3578.21, "total_tokens": 11539016}
3548
+ {"current_steps": 17650, "total_steps": 19080, "loss": 4.5938, "lr": 8.51854342773295e-07, "epoch": 9.250524109014675, "percentage": 92.51, "elapsed_time": "0:53:45", "remaining_time": "0:04:21", "throughput": 3578.45, "total_tokens": 11543752}
3549
+ {"current_steps": 17655, "total_steps": 19080, "loss": 4.908, "lr": 8.459456252163739e-07, "epoch": 9.25314465408805, "percentage": 92.53, "elapsed_time": "0:53:46", "remaining_time": "0:04:20", "throughput": 3578.52, "total_tokens": 11546664}
3550
+ {"current_steps": 17660, "total_steps": 19080, "loss": 4.61, "lr": 8.400571186688466e-07, "epoch": 9.255765199161425, "percentage": 92.56, "elapsed_time": "0:53:47", "remaining_time": "0:04:19", "throughput": 3578.55, "total_tokens": 11549032}
3551
+ {"current_steps": 17665, "total_steps": 19080, "loss": 5.1138, "lr": 8.341888280579386e-07, "epoch": 9.2583857442348, "percentage": 92.58, "elapsed_time": "0:53:48", "remaining_time": "0:04:18", "throughput": 3578.67, "total_tokens": 11552328}
3552
+ {"current_steps": 17670, "total_steps": 19080, "loss": 4.7185, "lr": 8.283407582939689e-07, "epoch": 9.261006289308176, "percentage": 92.61, "elapsed_time": "0:53:48", "remaining_time": "0:04:17", "throughput": 3578.74, "total_tokens": 11555464}
3553
+ {"current_steps": 17675, "total_steps": 19080, "loss": 4.7221, "lr": 8.22512914270332e-07, "epoch": 9.26362683438155, "percentage": 92.64, "elapsed_time": "0:53:49", "remaining_time": "0:04:16", "throughput": 3578.78, "total_tokens": 11558088}
3554
+ {"current_steps": 17680, "total_steps": 19080, "loss": 4.4545, "lr": 8.167053008635101e-07, "epoch": 9.266247379454926, "percentage": 92.66, "elapsed_time": "0:53:50", "remaining_time": "0:04:15", "throughput": 3578.85, "total_tokens": 11560872}
3555
+ {"current_steps": 17685, "total_steps": 19080, "loss": 4.2644, "lr": 8.109179229330438e-07, "epoch": 9.268867924528301, "percentage": 92.69, "elapsed_time": "0:53:51", "remaining_time": "0:04:14", "throughput": 3578.99, "total_tokens": 11564264}
3556
+ {"current_steps": 17690, "total_steps": 19080, "loss": 5.0274, "lr": 8.051507853215401e-07, "epoch": 9.271488469601676, "percentage": 92.71, "elapsed_time": "0:53:51", "remaining_time": "0:04:13", "throughput": 3579.11, "total_tokens": 11567656}
3557
+ {"current_steps": 17695, "total_steps": 19080, "loss": 4.6356, "lr": 7.994038928546887e-07, "epoch": 9.274109014675052, "percentage": 92.74, "elapsed_time": "0:53:52", "remaining_time": "0:04:13", "throughput": 3579.23, "total_tokens": 11571176}
3558
+ {"current_steps": 17700, "total_steps": 19080, "loss": 4.8219, "lr": 7.93677250341221e-07, "epoch": 9.276729559748428, "percentage": 92.77, "elapsed_time": "0:53:53", "remaining_time": "0:04:12", "throughput": 3579.31, "total_tokens": 11574216}
3559
+ {"current_steps": 17705, "total_steps": 19080, "loss": 5.4325, "lr": 7.879708625729287e-07, "epoch": 9.279350104821804, "percentage": 92.79, "elapsed_time": "0:53:54", "remaining_time": "0:04:11", "throughput": 3579.4, "total_tokens": 11577608}
3560
+ {"current_steps": 17710, "total_steps": 19080, "loss": 5.1345, "lr": 7.822847343246564e-07, "epoch": 9.281970649895179, "percentage": 92.82, "elapsed_time": "0:53:55", "remaining_time": "0:04:10", "throughput": 3579.51, "total_tokens": 11581000}
3561
+ {"current_steps": 17715, "total_steps": 19080, "loss": 4.9794, "lr": 7.766188703542954e-07, "epoch": 9.284591194968554, "percentage": 92.85, "elapsed_time": "0:53:56", "remaining_time": "0:04:09", "throughput": 3579.64, "total_tokens": 11584840}
3562
+ {"current_steps": 17720, "total_steps": 19080, "loss": 4.4785, "lr": 7.709732754027866e-07, "epoch": 9.28721174004193, "percentage": 92.87, "elapsed_time": "0:53:57", "remaining_time": "0:04:08", "throughput": 3579.74, "total_tokens": 11587912}
3563
+ {"current_steps": 17725, "total_steps": 19080, "loss": 4.7322, "lr": 7.653479541941038e-07, "epoch": 9.289832285115304, "percentage": 92.9, "elapsed_time": "0:53:58", "remaining_time": "0:04:07", "throughput": 3579.91, "total_tokens": 11591752}
3564
+ {"current_steps": 17730, "total_steps": 19080, "loss": 4.8923, "lr": 7.597429114352572e-07, "epoch": 9.29245283018868, "percentage": 92.92, "elapsed_time": "0:53:58", "remaining_time": "0:04:06", "throughput": 3579.94, "total_tokens": 11594248}
3565
+ {"current_steps": 17735, "total_steps": 19080, "loss": 4.5835, "lr": 7.541581518162922e-07, "epoch": 9.295073375262055, "percentage": 92.95, "elapsed_time": "0:53:59", "remaining_time": "0:04:05", "throughput": 3580.01, "total_tokens": 11597448}
3566
+ {"current_steps": 17740, "total_steps": 19080, "loss": 4.8804, "lr": 7.485936800102788e-07, "epoch": 9.29769392033543, "percentage": 92.98, "elapsed_time": "0:54:00", "remaining_time": "0:04:04", "throughput": 3580.08, "total_tokens": 11600360}
3567
+ {"current_steps": 17745, "total_steps": 19080, "loss": 4.2884, "lr": 7.430495006733152e-07, "epoch": 9.300314465408805, "percentage": 93.0, "elapsed_time": "0:54:01", "remaining_time": "0:04:03", "throughput": 3580.17, "total_tokens": 11603528}
3568
+ {"current_steps": 17750, "total_steps": 19080, "loss": 4.9263, "lr": 7.375256184445178e-07, "epoch": 9.30293501048218, "percentage": 93.03, "elapsed_time": "0:54:01", "remaining_time": "0:04:02", "throughput": 3580.24, "total_tokens": 11606600}
3569
+ {"current_steps": 17755, "total_steps": 19080, "loss": 4.2094, "lr": 7.320220379460146e-07, "epoch": 9.305555555555555, "percentage": 93.06, "elapsed_time": "0:54:02", "remaining_time": "0:04:01", "throughput": 3580.28, "total_tokens": 11609064}
3570
+ {"current_steps": 17760, "total_steps": 19080, "loss": 4.515, "lr": 7.265387637829524e-07, "epoch": 9.30817610062893, "percentage": 93.08, "elapsed_time": "0:54:03", "remaining_time": "0:04:01", "throughput": 3580.37, "total_tokens": 11612328}
3571
+ {"current_steps": 17765, "total_steps": 19080, "loss": 4.9506, "lr": 7.210758005434887e-07, "epoch": 9.310796645702306, "percentage": 93.11, "elapsed_time": "0:54:04", "remaining_time": "0:04:00", "throughput": 3580.47, "total_tokens": 11615912}
3572
+ {"current_steps": 17770, "total_steps": 19080, "loss": 4.9355, "lr": 7.156331527987753e-07, "epoch": 9.31341719077568, "percentage": 93.13, "elapsed_time": "0:54:04", "remaining_time": "0:03:59", "throughput": 3580.56, "total_tokens": 11618888}
3573
+ {"current_steps": 17775, "total_steps": 19080, "loss": 4.873, "lr": 7.102108251029777e-07, "epoch": 9.316037735849056, "percentage": 93.16, "elapsed_time": "0:54:05", "remaining_time": "0:03:58", "throughput": 3580.6, "total_tokens": 11621544}
3574
+ {"current_steps": 17780, "total_steps": 19080, "loss": 4.7155, "lr": 7.04808821993247e-07, "epoch": 9.318658280922431, "percentage": 93.19, "elapsed_time": "0:54:06", "remaining_time": "0:03:57", "throughput": 3580.63, "total_tokens": 11624040}
3575
+ {"current_steps": 17785, "total_steps": 19080, "loss": 4.9008, "lr": 6.994271479897314e-07, "epoch": 9.321278825995806, "percentage": 93.21, "elapsed_time": "0:54:07", "remaining_time": "0:03:56", "throughput": 3580.68, "total_tokens": 11626728}
3576
+ {"current_steps": 17790, "total_steps": 19080, "loss": 4.632, "lr": 6.940658075955759e-07, "epoch": 9.323899371069182, "percentage": 93.24, "elapsed_time": "0:54:07", "remaining_time": "0:03:55", "throughput": 3580.77, "total_tokens": 11629832}
3577
+ {"current_steps": 17795, "total_steps": 19080, "loss": 4.7675, "lr": 6.887248052969003e-07, "epoch": 9.326519916142558, "percentage": 93.27, "elapsed_time": "0:54:08", "remaining_time": "0:03:54", "throughput": 3580.91, "total_tokens": 11633320}
3578
+ {"current_steps": 17800, "total_steps": 19080, "loss": 4.7699, "lr": 6.834041455628104e-07, "epoch": 9.329140461215934, "percentage": 93.29, "elapsed_time": "0:54:09", "remaining_time": "0:03:53", "throughput": 3580.98, "total_tokens": 11636104}
3579
+ {"current_steps": 17805, "total_steps": 19080, "loss": 4.4308, "lr": 6.781038328454003e-07, "epoch": 9.331761006289309, "percentage": 93.32, "elapsed_time": "0:54:10", "remaining_time": "0:03:52", "throughput": 3581.1, "total_tokens": 11639752}
3580
+ {"current_steps": 17810, "total_steps": 19080, "loss": 4.8927, "lr": 6.728238715797169e-07, "epoch": 9.334381551362684, "percentage": 93.34, "elapsed_time": "0:54:11", "remaining_time": "0:03:51", "throughput": 3581.16, "total_tokens": 11642664}
3581
+ {"current_steps": 17815, "total_steps": 19080, "loss": 5.0511, "lr": 6.675642661838011e-07, "epoch": 9.33700209643606, "percentage": 93.37, "elapsed_time": "0:54:11", "remaining_time": "0:03:50", "throughput": 3581.26, "total_tokens": 11646024}
3582
+ {"current_steps": 17820, "total_steps": 19080, "loss": 4.9122, "lr": 6.623250210586463e-07, "epoch": 9.339622641509434, "percentage": 93.4, "elapsed_time": "0:54:12", "remaining_time": "0:03:49", "throughput": 3581.35, "total_tokens": 11649192}
3583
+ {"current_steps": 17825, "total_steps": 19080, "loss": 5.2047, "lr": 6.571061405882095e-07, "epoch": 9.34224318658281, "percentage": 93.42, "elapsed_time": "0:54:13", "remaining_time": "0:03:49", "throughput": 3581.51, "total_tokens": 11653192}
3584
+ {"current_steps": 17830, "total_steps": 19080, "loss": 4.767, "lr": 6.519076291394172e-07, "epoch": 9.344863731656185, "percentage": 93.45, "elapsed_time": "0:54:14", "remaining_time": "0:03:48", "throughput": 3581.69, "total_tokens": 11657224}
3585
+ {"current_steps": 17835, "total_steps": 19080, "loss": 4.8745, "lr": 6.467294910621452e-07, "epoch": 9.34748427672956, "percentage": 93.47, "elapsed_time": "0:54:15", "remaining_time": "0:03:47", "throughput": 3581.77, "total_tokens": 11660328}
3586
+ {"current_steps": 17840, "total_steps": 19080, "loss": 4.826, "lr": 6.415717306892193e-07, "epoch": 9.350104821802935, "percentage": 93.5, "elapsed_time": "0:54:16", "remaining_time": "0:03:46", "throughput": 3581.86, "total_tokens": 11663400}
3587
+ {"current_steps": 17845, "total_steps": 19080, "loss": 4.678, "lr": 6.364343523364263e-07, "epoch": 9.35272536687631, "percentage": 93.53, "elapsed_time": "0:54:16", "remaining_time": "0:03:45", "throughput": 3581.93, "total_tokens": 11665992}
3588
+ {"current_steps": 17850, "total_steps": 19080, "loss": 4.2315, "lr": 6.313173603024802e-07, "epoch": 9.355345911949685, "percentage": 93.55, "elapsed_time": "0:54:17", "remaining_time": "0:03:44", "throughput": 3582.01, "total_tokens": 11669000}
3589
+ {"current_steps": 17855, "total_steps": 19080, "loss": 4.5663, "lr": 6.262207588690533e-07, "epoch": 9.35796645702306, "percentage": 93.58, "elapsed_time": "0:54:18", "remaining_time": "0:03:43", "throughput": 3582.08, "total_tokens": 11671976}
3590
+ {"current_steps": 17860, "total_steps": 19080, "loss": 4.8545, "lr": 6.211445523007398e-07, "epoch": 9.360587002096436, "percentage": 93.61, "elapsed_time": "0:54:19", "remaining_time": "0:03:42", "throughput": 3582.14, "total_tokens": 11674984}
3591
+ {"current_steps": 17865, "total_steps": 19080, "loss": 4.4912, "lr": 6.160887448450892e-07, "epoch": 9.36320754716981, "percentage": 93.63, "elapsed_time": "0:54:19", "remaining_time": "0:03:41", "throughput": 3582.19, "total_tokens": 11677864}
3592
+ {"current_steps": 17870, "total_steps": 19080, "loss": 4.12, "lr": 6.11053340732562e-07, "epoch": 9.365828092243186, "percentage": 93.66, "elapsed_time": "0:54:20", "remaining_time": "0:03:40", "throughput": 3582.31, "total_tokens": 11681128}
3593
+ {"current_steps": 17875, "total_steps": 19080, "loss": 4.7106, "lr": 6.060383441765544e-07, "epoch": 9.368448637316561, "percentage": 93.68, "elapsed_time": "0:54:21", "remaining_time": "0:03:39", "throughput": 3582.37, "total_tokens": 11683880}
3594
+ {"current_steps": 17880, "total_steps": 19080, "loss": 4.8576, "lr": 6.01043759373393e-07, "epoch": 9.371069182389936, "percentage": 93.71, "elapsed_time": "0:54:22", "remaining_time": "0:03:38", "throughput": 3582.49, "total_tokens": 11687496}
3595
+ {"current_steps": 17885, "total_steps": 19080, "loss": 4.9831, "lr": 5.960695905023128e-07, "epoch": 9.373689727463312, "percentage": 93.74, "elapsed_time": "0:54:23", "remaining_time": "0:03:38", "throughput": 3582.58, "total_tokens": 11690696}
3596
+ {"current_steps": 17890, "total_steps": 19080, "loss": 4.3041, "lr": 5.91115841725473e-07, "epoch": 9.376310272536688, "percentage": 93.76, "elapsed_time": "0:54:23", "remaining_time": "0:03:37", "throughput": 3582.64, "total_tokens": 11693544}
3597
+ {"current_steps": 17895, "total_steps": 19080, "loss": 4.5929, "lr": 5.861825171879415e-07, "epoch": 9.378930817610064, "percentage": 93.79, "elapsed_time": "0:54:24", "remaining_time": "0:03:36", "throughput": 3582.72, "total_tokens": 11696840}
3598
+ {"current_steps": 17900, "total_steps": 19080, "loss": 4.9579, "lr": 5.812696210177021e-07, "epoch": 9.381551362683439, "percentage": 93.82, "elapsed_time": "0:54:25", "remaining_time": "0:03:35", "throughput": 3582.8, "total_tokens": 11700264}
3599
+ {"current_steps": 17905, "total_steps": 19080, "loss": 4.0998, "lr": 5.763771573256415e-07, "epoch": 9.384171907756814, "percentage": 93.84, "elapsed_time": "0:54:26", "remaining_time": "0:03:34", "throughput": 3582.89, "total_tokens": 11703496}
3600
+ {"current_steps": 17910, "total_steps": 19080, "loss": 5.0112, "lr": 5.715051302055491e-07, "epoch": 9.38679245283019, "percentage": 93.87, "elapsed_time": "0:54:27", "remaining_time": "0:03:33", "throughput": 3582.98, "total_tokens": 11706664}
3601
+ {"current_steps": 17915, "total_steps": 19080, "loss": 4.8623, "lr": 5.666535437341108e-07, "epoch": 9.389412997903564, "percentage": 93.89, "elapsed_time": "0:54:28", "remaining_time": "0:03:32", "throughput": 3583.05, "total_tokens": 11709480}
3602
+ {"current_steps": 17920, "total_steps": 19080, "loss": 4.7276, "lr": 5.618224019709212e-07, "epoch": 9.39203354297694, "percentage": 93.92, "elapsed_time": "0:54:28", "remaining_time": "0:03:31", "throughput": 3583.15, "total_tokens": 11712712}
3603
+ {"current_steps": 17925, "total_steps": 19080, "loss": 5.2687, "lr": 5.570117089584548e-07, "epoch": 9.394654088050315, "percentage": 93.95, "elapsed_time": "0:54:29", "remaining_time": "0:03:30", "throughput": 3583.18, "total_tokens": 11715464}
3604
+ {"current_steps": 17930, "total_steps": 19080, "loss": 5.3163, "lr": 5.522214687220751e-07, "epoch": 9.39727463312369, "percentage": 93.97, "elapsed_time": "0:54:30", "remaining_time": "0:03:29", "throughput": 3583.25, "total_tokens": 11718120}
3605
+ {"current_steps": 17935, "total_steps": 19080, "loss": 4.6542, "lr": 5.474516852700451e-07, "epoch": 9.399895178197065, "percentage": 94.0, "elapsed_time": "0:54:31", "remaining_time": "0:03:28", "throughput": 3583.36, "total_tokens": 11721512}
3606
+ {"current_steps": 17940, "total_steps": 19080, "loss": 4.0778, "lr": 5.427023625934946e-07, "epoch": 9.40251572327044, "percentage": 94.03, "elapsed_time": "0:54:31", "remaining_time": "0:03:27", "throughput": 3583.39, "total_tokens": 11724008}
3607
+ {"current_steps": 17945, "total_steps": 19080, "loss": 4.6752, "lr": 5.379735046664419e-07, "epoch": 9.405136268343815, "percentage": 94.05, "elapsed_time": "0:54:32", "remaining_time": "0:03:26", "throughput": 3583.42, "total_tokens": 11726696}
3608
+ {"current_steps": 17950, "total_steps": 19080, "loss": 5.0386, "lr": 5.33265115445783e-07, "epoch": 9.40775681341719, "percentage": 94.08, "elapsed_time": "0:54:33", "remaining_time": "0:03:26", "throughput": 3583.56, "total_tokens": 11730408}
3609
+ {"current_steps": 17955, "total_steps": 19080, "loss": 5.1352, "lr": 5.285771988712746e-07, "epoch": 9.410377358490566, "percentage": 94.1, "elapsed_time": "0:54:34", "remaining_time": "0:03:25", "throughput": 3583.61, "total_tokens": 11733320}
3610
+ {"current_steps": 17960, "total_steps": 19080, "loss": 4.9984, "lr": 5.239097588655595e-07, "epoch": 9.41299790356394, "percentage": 94.13, "elapsed_time": "0:54:35", "remaining_time": "0:03:24", "throughput": 3583.86, "total_tokens": 11738472}
3611
+ {"current_steps": 17965, "total_steps": 19080, "loss": 5.0667, "lr": 5.192627993341359e-07, "epoch": 9.415618448637316, "percentage": 94.16, "elapsed_time": "0:54:36", "remaining_time": "0:03:23", "throughput": 3583.9, "total_tokens": 11741032}
3612
+ {"current_steps": 17970, "total_steps": 19080, "loss": 5.2111, "lr": 5.146363241653657e-07, "epoch": 9.418238993710691, "percentage": 94.18, "elapsed_time": "0:54:36", "remaining_time": "0:03:22", "throughput": 3584.0, "total_tokens": 11744328}
3613
+ {"current_steps": 17975, "total_steps": 19080, "loss": 5.1963, "lr": 5.100303372304716e-07, "epoch": 9.420859538784066, "percentage": 94.21, "elapsed_time": "0:54:37", "remaining_time": "0:03:21", "throughput": 3584.11, "total_tokens": 11747976}
3614
+ {"current_steps": 17980, "total_steps": 19080, "loss": 4.7959, "lr": 5.054448423835373e-07, "epoch": 9.423480083857442, "percentage": 94.23, "elapsed_time": "0:54:38", "remaining_time": "0:03:20", "throughput": 3584.22, "total_tokens": 11751144}
3615
+ {"current_steps": 17985, "total_steps": 19080, "loss": 4.6962, "lr": 5.008798434614908e-07, "epoch": 9.426100628930818, "percentage": 94.26, "elapsed_time": "0:54:39", "remaining_time": "0:03:19", "throughput": 3584.3, "total_tokens": 11754312}
3616
+ {"current_steps": 17990, "total_steps": 19080, "loss": 4.2372, "lr": 4.963353442841156e-07, "epoch": 9.428721174004194, "percentage": 94.29, "elapsed_time": "0:54:40", "remaining_time": "0:03:18", "throughput": 3584.44, "total_tokens": 11757896}
3617
+ {"current_steps": 17995, "total_steps": 19080, "loss": 4.6567, "lr": 4.918113486540393e-07, "epoch": 9.431341719077569, "percentage": 94.31, "elapsed_time": "0:54:41", "remaining_time": "0:03:17", "throughput": 3584.51, "total_tokens": 11760840}
3618
+ {"current_steps": 18000, "total_steps": 19080, "loss": 5.1522, "lr": 4.873078603567421e-07, "epoch": 9.433962264150944, "percentage": 94.34, "elapsed_time": "0:54:41", "remaining_time": "0:03:16", "throughput": 3584.61, "total_tokens": 11764136}
3619
+ {"current_steps": 18005, "total_steps": 19080, "loss": 4.5166, "lr": 4.828248831605292e-07, "epoch": 9.43658280922432, "percentage": 94.37, "elapsed_time": "0:54:42", "remaining_time": "0:03:15", "throughput": 3584.67, "total_tokens": 11766696}
3620
+ {"current_steps": 18010, "total_steps": 19080, "loss": 5.0052, "lr": 4.783624208165554e-07, "epoch": 9.439203354297694, "percentage": 94.39, "elapsed_time": "0:54:43", "remaining_time": "0:03:15", "throughput": 3584.73, "total_tokens": 11769160}
3621
+ {"current_steps": 18015, "total_steps": 19080, "loss": 4.9698, "lr": 4.739204770588035e-07, "epoch": 9.44182389937107, "percentage": 94.42, "elapsed_time": "0:54:43", "remaining_time": "0:03:14", "throughput": 3584.83, "total_tokens": 11772360}
3622
+ {"current_steps": 18020, "total_steps": 19080, "loss": 4.476, "lr": 4.694990556040918e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "0:54:44", "remaining_time": "0:03:13", "throughput": 3584.89, "total_tokens": 11774984}
3623
+ {"current_steps": 18025, "total_steps": 19080, "loss": 4.7578, "lr": 4.65098160152061e-07, "epoch": 9.44706498951782, "percentage": 94.47, "elapsed_time": "0:54:45", "remaining_time": "0:03:12", "throughput": 3584.93, "total_tokens": 11777928}
3624
+ {"current_steps": 18030, "total_steps": 19080, "loss": 5.0103, "lr": 4.6071779438517924e-07, "epoch": 9.449685534591195, "percentage": 94.5, "elapsed_time": "0:54:46", "remaining_time": "0:03:11", "throughput": 3585.04, "total_tokens": 11781096}
3625
+ {"current_steps": 18035, "total_steps": 19080, "loss": 4.8236, "lr": 4.563579619687369e-07, "epoch": 9.45230607966457, "percentage": 94.52, "elapsed_time": "0:54:46", "remaining_time": "0:03:10", "throughput": 3585.1, "total_tokens": 11784104}
3626
+ {"current_steps": 18040, "total_steps": 19080, "loss": 5.1027, "lr": 4.5201866655084636e-07, "epoch": 9.454926624737945, "percentage": 94.55, "elapsed_time": "0:54:47", "remaining_time": "0:03:09", "throughput": 3585.21, "total_tokens": 11787304}
3627
+ {"current_steps": 18045, "total_steps": 19080, "loss": 4.3268, "lr": 4.4769991176242533e-07, "epoch": 9.45754716981132, "percentage": 94.58, "elapsed_time": "0:54:48", "remaining_time": "0:03:08", "throughput": 3585.25, "total_tokens": 11790024}
3628
+ {"current_steps": 18050, "total_steps": 19080, "loss": 4.7543, "lr": 4.4340170121721645e-07, "epoch": 9.460167714884696, "percentage": 94.6, "elapsed_time": "0:54:49", "remaining_time": "0:03:07", "throughput": 3585.3, "total_tokens": 11792840}
3629
+ {"current_steps": 18055, "total_steps": 19080, "loss": 4.5631, "lr": 4.3912403851176234e-07, "epoch": 9.46278825995807, "percentage": 94.63, "elapsed_time": "0:54:49", "remaining_time": "0:03:06", "throughput": 3585.35, "total_tokens": 11795720}
3630
+ {"current_steps": 18060, "total_steps": 19080, "loss": 4.4589, "lr": 4.348669272254163e-07, "epoch": 9.465408805031446, "percentage": 94.65, "elapsed_time": "0:54:50", "remaining_time": "0:03:05", "throughput": 3585.38, "total_tokens": 11798472}
3631
+ {"current_steps": 18065, "total_steps": 19080, "loss": 4.4908, "lr": 4.306303709203374e-07, "epoch": 9.468029350104821, "percentage": 94.68, "elapsed_time": "0:54:51", "remaining_time": "0:03:04", "throughput": 3585.48, "total_tokens": 11801800}
3632
+ {"current_steps": 18070, "total_steps": 19080, "loss": 4.9893, "lr": 4.264143731414788e-07, "epoch": 9.470649895178196, "percentage": 94.71, "elapsed_time": "0:54:52", "remaining_time": "0:03:04", "throughput": 3585.58, "total_tokens": 11805160}
3633
+ {"current_steps": 18075, "total_steps": 19080, "loss": 5.0055, "lr": 4.2221893741659636e-07, "epoch": 9.473270440251572, "percentage": 94.73, "elapsed_time": "0:54:54", "remaining_time": "0:03:03", "throughput": 3585.84, "total_tokens": 11811816}
3634
+ {"current_steps": 18080, "total_steps": 19080, "loss": 5.0781, "lr": 4.180440672562402e-07, "epoch": 9.475890985324948, "percentage": 94.76, "elapsed_time": "0:54:55", "remaining_time": "0:03:02", "throughput": 3586.02, "total_tokens": 11816040}
3635
+ {"current_steps": 18085, "total_steps": 19080, "loss": 4.8746, "lr": 4.1388976615374665e-07, "epoch": 9.478511530398324, "percentage": 94.79, "elapsed_time": "0:54:55", "remaining_time": "0:03:01", "throughput": 3586.11, "total_tokens": 11818920}
3636
+ {"current_steps": 18090, "total_steps": 19080, "loss": 4.7548, "lr": 4.097560375852516e-07, "epoch": 9.481132075471699, "percentage": 94.81, "elapsed_time": "0:54:56", "remaining_time": "0:03:00", "throughput": 3586.18, "total_tokens": 11821736}
3637
+ {"current_steps": 18095, "total_steps": 19080, "loss": 4.7153, "lr": 4.056428850096661e-07, "epoch": 9.483752620545074, "percentage": 94.84, "elapsed_time": "0:54:57", "remaining_time": "0:02:59", "throughput": 3586.28, "total_tokens": 11825256}
3638
+ {"current_steps": 18100, "total_steps": 19080, "loss": 4.9875, "lr": 4.01550311868687e-07, "epoch": 9.48637316561845, "percentage": 94.86, "elapsed_time": "0:54:58", "remaining_time": "0:02:58", "throughput": 3586.47, "total_tokens": 11829416}
3639
+ {"current_steps": 18105, "total_steps": 19080, "loss": 4.4875, "lr": 3.974783215867972e-07, "epoch": 9.488993710691824, "percentage": 94.89, "elapsed_time": "0:54:59", "remaining_time": "0:02:57", "throughput": 3586.53, "total_tokens": 11832200}
3640
+ {"current_steps": 18110, "total_steps": 19080, "loss": 5.4377, "lr": 3.9342691757124626e-07, "epoch": 9.4916142557652, "percentage": 94.92, "elapsed_time": "0:54:59", "remaining_time": "0:02:56", "throughput": 3586.64, "total_tokens": 11835592}
3641
+ {"current_steps": 18115, "total_steps": 19080, "loss": 5.156, "lr": 3.8939610321206966e-07, "epoch": 9.494234800838575, "percentage": 94.94, "elapsed_time": "0:55:00", "remaining_time": "0:02:55", "throughput": 3586.76, "total_tokens": 11839400}
3642
+ {"current_steps": 18120, "total_steps": 19080, "loss": 5.4055, "lr": 3.853858818820694e-07, "epoch": 9.49685534591195, "percentage": 94.97, "elapsed_time": "0:55:01", "remaining_time": "0:02:54", "throughput": 3586.83, "total_tokens": 11842216}
3643
+ {"current_steps": 18125, "total_steps": 19080, "loss": 4.5529, "lr": 3.8139625693680847e-07, "epoch": 9.499475890985325, "percentage": 94.99, "elapsed_time": "0:55:02", "remaining_time": "0:02:53", "throughput": 3586.89, "total_tokens": 11844936}
3644
+ {"current_steps": 18126, "total_steps": 19080, "eval_loss": 4.819365978240967, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:55:18", "remaining_time": "0:02:54", "throughput": 3569.49, "total_tokens": 11845704}
3645
+ {"current_steps": 18130, "total_steps": 19080, "loss": 5.0405, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "0:55:20", "remaining_time": "0:02:54", "throughput": 3567.84, "total_tokens": 11847912}
3646
+ {"current_steps": 18135, "total_steps": 19080, "loss": 4.7468, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "0:55:22", "remaining_time": "0:02:53", "throughput": 3568.09, "total_tokens": 11853192}