rbelanec commited on
Commit
4699c1a
·
verified ·
1 Parent(s): 4cc4a38

Training in progress, step 9086

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +167 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf09e689bc7ec31294c8062c1995879c4b08ceef1d89ea5b0a3d28f4e71f767e
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8864cf9309bfec094519e0e560fcacb55d787446cd5f6be6c1ae8aa22447e279
3
  size 798032
trainer_log.jsonl CHANGED
@@ -1666,3 +1666,170 @@
1666
  {"current_steps": 8280, "total_steps": 16520, "loss": 0.0993, "lr": 2.924232752549091e-05, "epoch": 10.024213075060533, "percentage": 50.12, "elapsed_time": "0:23:22", "remaining_time": "0:23:16", "throughput": 2418.04, "total_tokens": 3392320}
1667
  {"current_steps": 8285, "total_steps": 16520, "loss": 0.0804, "lr": 2.9216295854829455e-05, "epoch": 10.030266343825666, "percentage": 50.15, "elapsed_time": "0:23:23", "remaining_time": "0:23:15", "throughput": 2417.92, "total_tokens": 3394336}
1668
  {"current_steps": 8290, "total_steps": 16520, "loss": 0.1493, "lr": 2.919025947801886e-05, "epoch": 10.036319612590798, "percentage": 50.18, "elapsed_time": "0:23:24", "remaining_time": "0:23:14", "throughput": 2417.97, "total_tokens": 3396288}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1666
  {"current_steps": 8280, "total_steps": 16520, "loss": 0.0993, "lr": 2.924232752549091e-05, "epoch": 10.024213075060533, "percentage": 50.12, "elapsed_time": "0:23:22", "remaining_time": "0:23:16", "throughput": 2418.04, "total_tokens": 3392320}
1667
  {"current_steps": 8285, "total_steps": 16520, "loss": 0.0804, "lr": 2.9216295854829455e-05, "epoch": 10.030266343825666, "percentage": 50.15, "elapsed_time": "0:23:23", "remaining_time": "0:23:15", "throughput": 2417.92, "total_tokens": 3394336}
1668
  {"current_steps": 8290, "total_steps": 16520, "loss": 0.1493, "lr": 2.919025947801886e-05, "epoch": 10.036319612590798, "percentage": 50.18, "elapsed_time": "0:23:24", "remaining_time": "0:23:14", "throughput": 2417.97, "total_tokens": 3396288}
1669
+ {"current_steps": 8295, "total_steps": 16520, "loss": 0.0891, "lr": 2.9164218424120432e-05, "epoch": 10.042372881355933, "percentage": 50.21, "elapsed_time": "0:23:25", "remaining_time": "0:23:13", "throughput": 2418.01, "total_tokens": 3398432}
1670
+ {"current_steps": 8300, "total_steps": 16520, "loss": 0.066, "lr": 2.9138172722200696e-05, "epoch": 10.048426150121065, "percentage": 50.24, "elapsed_time": "0:23:26", "remaining_time": "0:23:12", "throughput": 2418.05, "total_tokens": 3400320}
1671
+ {"current_steps": 8305, "total_steps": 16520, "loss": 0.1268, "lr": 2.9112122401331375e-05, "epoch": 10.0544794188862, "percentage": 50.27, "elapsed_time": "0:23:27", "remaining_time": "0:23:11", "throughput": 2418.15, "total_tokens": 3402432}
1672
+ {"current_steps": 8310, "total_steps": 16520, "loss": 0.075, "lr": 2.908606749058933e-05, "epoch": 10.060532687651332, "percentage": 50.3, "elapsed_time": "0:23:27", "remaining_time": "0:23:10", "throughput": 2418.23, "total_tokens": 3404512}
1673
+ {"current_steps": 8315, "total_steps": 16520, "loss": 0.1088, "lr": 2.9060008019056566e-05, "epoch": 10.066585956416464, "percentage": 50.33, "elapsed_time": "0:23:28", "remaining_time": "0:23:10", "throughput": 2418.36, "total_tokens": 3406720}
1674
+ {"current_steps": 8320, "total_steps": 16520, "loss": 0.0871, "lr": 2.903394401582017e-05, "epoch": 10.072639225181598, "percentage": 50.36, "elapsed_time": "0:23:29", "remaining_time": "0:23:09", "throughput": 2418.38, "total_tokens": 3408640}
1675
+ {"current_steps": 8325, "total_steps": 16520, "loss": 0.0933, "lr": 2.9007875509972275e-05, "epoch": 10.07869249394673, "percentage": 50.39, "elapsed_time": "0:23:30", "remaining_time": "0:23:08", "throughput": 2418.38, "total_tokens": 3410656}
1676
+ {"current_steps": 8330, "total_steps": 16520, "loss": 0.1254, "lr": 2.8981802530610057e-05, "epoch": 10.084745762711865, "percentage": 50.42, "elapsed_time": "0:23:31", "remaining_time": "0:23:07", "throughput": 2418.44, "total_tokens": 3412704}
1677
+ {"current_steps": 8335, "total_steps": 16520, "loss": 0.1496, "lr": 2.8955725106835686e-05, "epoch": 10.090799031476998, "percentage": 50.45, "elapsed_time": "0:23:31", "remaining_time": "0:23:06", "throughput": 2418.55, "total_tokens": 3414752}
1678
+ {"current_steps": 8340, "total_steps": 16520, "loss": 0.0846, "lr": 2.8929643267756286e-05, "epoch": 10.09685230024213, "percentage": 50.48, "elapsed_time": "0:23:32", "remaining_time": "0:23:05", "throughput": 2418.61, "total_tokens": 3416864}
1679
+ {"current_steps": 8345, "total_steps": 16520, "loss": 0.1765, "lr": 2.8903557042483887e-05, "epoch": 10.102905569007264, "percentage": 50.51, "elapsed_time": "0:23:33", "remaining_time": "0:23:04", "throughput": 2418.67, "total_tokens": 3418848}
1680
+ {"current_steps": 8350, "total_steps": 16520, "loss": 0.0869, "lr": 2.887746646013546e-05, "epoch": 10.108958837772397, "percentage": 50.54, "elapsed_time": "0:23:34", "remaining_time": "0:23:03", "throughput": 2418.77, "total_tokens": 3421024}
1681
+ {"current_steps": 8355, "total_steps": 16520, "loss": 0.1965, "lr": 2.8851371549832813e-05, "epoch": 10.115012106537531, "percentage": 50.58, "elapsed_time": "0:23:35", "remaining_time": "0:23:03", "throughput": 2418.81, "total_tokens": 3423168}
1682
+ {"current_steps": 8360, "total_steps": 16520, "loss": 0.108, "lr": 2.8825272340702574e-05, "epoch": 10.121065375302663, "percentage": 50.61, "elapsed_time": "0:23:36", "remaining_time": "0:23:02", "throughput": 2418.92, "total_tokens": 3425344}
1683
+ {"current_steps": 8365, "total_steps": 16520, "loss": 0.0289, "lr": 2.8799168861876203e-05, "epoch": 10.127118644067796, "percentage": 50.64, "elapsed_time": "0:23:36", "remaining_time": "0:23:01", "throughput": 2418.97, "total_tokens": 3427392}
1684
+ {"current_steps": 8370, "total_steps": 16520, "loss": 0.1557, "lr": 2.8773061142489882e-05, "epoch": 10.13317191283293, "percentage": 50.67, "elapsed_time": "0:23:37", "remaining_time": "0:23:00", "throughput": 2419.03, "total_tokens": 3429248}
1685
+ {"current_steps": 8375, "total_steps": 16520, "loss": 0.0804, "lr": 2.8746949211684577e-05, "epoch": 10.139225181598063, "percentage": 50.7, "elapsed_time": "0:23:38", "remaining_time": "0:22:59", "throughput": 2419.12, "total_tokens": 3431296}
1686
+ {"current_steps": 8380, "total_steps": 16520, "loss": 0.1264, "lr": 2.872083309860591e-05, "epoch": 10.145278450363197, "percentage": 50.73, "elapsed_time": "0:23:39", "remaining_time": "0:22:58", "throughput": 2419.21, "total_tokens": 3433408}
1687
+ {"current_steps": 8385, "total_steps": 16520, "loss": 0.0744, "lr": 2.8694712832404198e-05, "epoch": 10.15133171912833, "percentage": 50.76, "elapsed_time": "0:23:40", "remaining_time": "0:22:57", "throughput": 2419.3, "total_tokens": 3435648}
1688
+ {"current_steps": 8390, "total_steps": 16520, "loss": 0.1268, "lr": 2.8668588442234373e-05, "epoch": 10.157384987893462, "percentage": 50.79, "elapsed_time": "0:23:40", "remaining_time": "0:22:56", "throughput": 2419.34, "total_tokens": 3437600}
1689
+ {"current_steps": 8395, "total_steps": 16520, "loss": 0.0868, "lr": 2.8642459957255996e-05, "epoch": 10.163438256658596, "percentage": 50.82, "elapsed_time": "0:23:41", "remaining_time": "0:22:56", "throughput": 2419.43, "total_tokens": 3439776}
1690
+ {"current_steps": 8400, "total_steps": 16520, "loss": 0.1255, "lr": 2.8616327406633175e-05, "epoch": 10.169491525423728, "percentage": 50.85, "elapsed_time": "0:23:42", "remaining_time": "0:22:55", "throughput": 2419.49, "total_tokens": 3441696}
1691
+ {"current_steps": 8405, "total_steps": 16520, "loss": 0.1144, "lr": 2.8590190819534567e-05, "epoch": 10.175544794188863, "percentage": 50.88, "elapsed_time": "0:23:43", "remaining_time": "0:22:54", "throughput": 2419.53, "total_tokens": 3443648}
1692
+ {"current_steps": 8410, "total_steps": 16520, "loss": 0.0864, "lr": 2.8564050225133337e-05, "epoch": 10.181598062953995, "percentage": 50.91, "elapsed_time": "0:23:44", "remaining_time": "0:22:53", "throughput": 2419.52, "total_tokens": 3445536}
1693
+ {"current_steps": 8415, "total_steps": 16520, "loss": 0.1928, "lr": 2.8537905652607122e-05, "epoch": 10.187651331719128, "percentage": 50.94, "elapsed_time": "0:23:44", "remaining_time": "0:22:52", "throughput": 2419.59, "total_tokens": 3447584}
1694
+ {"current_steps": 8420, "total_steps": 16520, "loss": 0.1441, "lr": 2.8511757131137982e-05, "epoch": 10.193704600484262, "percentage": 50.97, "elapsed_time": "0:23:45", "remaining_time": "0:22:51", "throughput": 2419.64, "total_tokens": 3449568}
1695
+ {"current_steps": 8425, "total_steps": 16520, "loss": 0.0949, "lr": 2.848560468991241e-05, "epoch": 10.199757869249394, "percentage": 51.0, "elapsed_time": "0:23:46", "remaining_time": "0:22:50", "throughput": 2419.71, "total_tokens": 3451680}
1696
+ {"current_steps": 8430, "total_steps": 16520, "loss": 0.1078, "lr": 2.8459448358121253e-05, "epoch": 10.205811138014528, "percentage": 51.03, "elapsed_time": "0:23:47", "remaining_time": "0:22:49", "throughput": 2419.75, "total_tokens": 3453696}
1697
+ {"current_steps": 8435, "total_steps": 16520, "loss": 0.068, "lr": 2.843328816495972e-05, "epoch": 10.211864406779661, "percentage": 51.06, "elapsed_time": "0:23:48", "remaining_time": "0:22:48", "throughput": 2419.84, "total_tokens": 3455808}
1698
+ {"current_steps": 8440, "total_steps": 16520, "loss": 0.077, "lr": 2.8407124139627323e-05, "epoch": 10.217917675544793, "percentage": 51.09, "elapsed_time": "0:23:48", "remaining_time": "0:22:47", "throughput": 2419.86, "total_tokens": 3457856}
1699
+ {"current_steps": 8445, "total_steps": 16520, "loss": 0.0767, "lr": 2.838095631132784e-05, "epoch": 10.223970944309928, "percentage": 51.12, "elapsed_time": "0:23:49", "remaining_time": "0:22:47", "throughput": 2419.92, "total_tokens": 3459904}
1700
+ {"current_steps": 8450, "total_steps": 16520, "loss": 0.187, "lr": 2.83547847092693e-05, "epoch": 10.23002421307506, "percentage": 51.15, "elapsed_time": "0:23:50", "remaining_time": "0:22:46", "throughput": 2419.92, "total_tokens": 3461824}
1701
+ {"current_steps": 8455, "total_steps": 16520, "loss": 0.0614, "lr": 2.8328609362663956e-05, "epoch": 10.236077481840194, "percentage": 51.18, "elapsed_time": "0:23:51", "remaining_time": "0:22:45", "throughput": 2419.93, "total_tokens": 3463904}
1702
+ {"current_steps": 8460, "total_steps": 16520, "loss": 0.0711, "lr": 2.8302430300728227e-05, "epoch": 10.242130750605327, "percentage": 51.21, "elapsed_time": "0:23:52", "remaining_time": "0:22:44", "throughput": 2420.05, "total_tokens": 3466048}
1703
+ {"current_steps": 8465, "total_steps": 16520, "loss": 0.0852, "lr": 2.827624755268269e-05, "epoch": 10.24818401937046, "percentage": 51.24, "elapsed_time": "0:23:53", "remaining_time": "0:22:43", "throughput": 2420.14, "total_tokens": 3468128}
1704
+ {"current_steps": 8470, "total_steps": 16520, "loss": 0.0945, "lr": 2.8250061147752015e-05, "epoch": 10.254237288135593, "percentage": 51.27, "elapsed_time": "0:23:53", "remaining_time": "0:22:42", "throughput": 2420.22, "total_tokens": 3470272}
1705
+ {"current_steps": 8475, "total_steps": 16520, "loss": 0.0894, "lr": 2.8223871115164985e-05, "epoch": 10.260290556900726, "percentage": 51.3, "elapsed_time": "0:23:54", "remaining_time": "0:22:41", "throughput": 2420.31, "total_tokens": 3472384}
1706
+ {"current_steps": 8480, "total_steps": 16520, "loss": 0.1102, "lr": 2.8197677484154407e-05, "epoch": 10.26634382566586, "percentage": 51.33, "elapsed_time": "0:23:55", "remaining_time": "0:22:40", "throughput": 2420.4, "total_tokens": 3474336}
1707
+ {"current_steps": 8485, "total_steps": 16520, "loss": 0.0548, "lr": 2.8171480283957118e-05, "epoch": 10.272397094430993, "percentage": 51.36, "elapsed_time": "0:23:56", "remaining_time": "0:22:40", "throughput": 2420.46, "total_tokens": 3476384}
1708
+ {"current_steps": 8490, "total_steps": 16520, "loss": 0.1311, "lr": 2.8145279543813918e-05, "epoch": 10.278450363196125, "percentage": 51.39, "elapsed_time": "0:23:57", "remaining_time": "0:22:39", "throughput": 2420.51, "total_tokens": 3478304}
1709
+ {"current_steps": 8495, "total_steps": 16520, "loss": 0.1061, "lr": 2.8119075292969606e-05, "epoch": 10.28450363196126, "percentage": 51.42, "elapsed_time": "0:23:57", "remaining_time": "0:22:38", "throughput": 2420.55, "total_tokens": 3480384}
1710
+ {"current_steps": 8500, "total_steps": 16520, "loss": 0.2581, "lr": 2.8092867560672836e-05, "epoch": 10.290556900726392, "percentage": 51.45, "elapsed_time": "0:23:58", "remaining_time": "0:22:37", "throughput": 2420.64, "total_tokens": 3482400}
1711
+ {"current_steps": 8505, "total_steps": 16520, "loss": 0.085, "lr": 2.80666563761762e-05, "epoch": 10.296610169491526, "percentage": 51.48, "elapsed_time": "0:23:59", "remaining_time": "0:22:36", "throughput": 2420.68, "total_tokens": 3484480}
1712
+ {"current_steps": 8510, "total_steps": 16520, "loss": 0.1076, "lr": 2.8040441768736104e-05, "epoch": 10.302663438256658, "percentage": 51.51, "elapsed_time": "0:24:00", "remaining_time": "0:22:35", "throughput": 2420.75, "total_tokens": 3486464}
1713
+ {"current_steps": 8515, "total_steps": 16520, "loss": 0.1091, "lr": 2.8014223767612807e-05, "epoch": 10.30871670702179, "percentage": 51.54, "elapsed_time": "0:24:01", "remaining_time": "0:22:34", "throughput": 2420.77, "total_tokens": 3488512}
1714
+ {"current_steps": 8520, "total_steps": 16520, "loss": 0.0664, "lr": 2.7988002402070347e-05, "epoch": 10.314769975786925, "percentage": 51.57, "elapsed_time": "0:24:01", "remaining_time": "0:22:33", "throughput": 2420.77, "total_tokens": 3490464}
1715
+ {"current_steps": 8525, "total_steps": 16520, "loss": 0.1256, "lr": 2.7961777701376502e-05, "epoch": 10.320823244552058, "percentage": 51.6, "elapsed_time": "0:24:02", "remaining_time": "0:22:33", "throughput": 2420.92, "total_tokens": 3492736}
1716
+ {"current_steps": 8530, "total_steps": 16520, "loss": 0.1479, "lr": 2.7935549694802788e-05, "epoch": 10.326876513317192, "percentage": 51.63, "elapsed_time": "0:24:03", "remaining_time": "0:22:32", "throughput": 2420.98, "total_tokens": 3494848}
1717
+ {"current_steps": 8535, "total_steps": 16520, "loss": 0.1109, "lr": 2.790931841162441e-05, "epoch": 10.332929782082324, "percentage": 51.66, "elapsed_time": "0:24:04", "remaining_time": "0:22:31", "throughput": 2421.04, "total_tokens": 3496896}
1718
+ {"current_steps": 8540, "total_steps": 16520, "loss": 0.0901, "lr": 2.7883083881120214e-05, "epoch": 10.338983050847457, "percentage": 51.69, "elapsed_time": "0:24:05", "remaining_time": "0:22:30", "throughput": 2421.1, "total_tokens": 3498944}
1719
+ {"current_steps": 8545, "total_steps": 16520, "loss": 0.1336, "lr": 2.78568461325727e-05, "epoch": 10.345036319612591, "percentage": 51.73, "elapsed_time": "0:24:05", "remaining_time": "0:22:29", "throughput": 2421.16, "total_tokens": 3500992}
1720
+ {"current_steps": 8550, "total_steps": 16520, "loss": 0.1437, "lr": 2.7830605195267933e-05, "epoch": 10.351089588377723, "percentage": 51.76, "elapsed_time": "0:24:06", "remaining_time": "0:22:28", "throughput": 2421.24, "total_tokens": 3503072}
1721
+ {"current_steps": 8555, "total_steps": 16520, "loss": 0.1089, "lr": 2.780436109849555e-05, "epoch": 10.357142857142858, "percentage": 51.79, "elapsed_time": "0:24:07", "remaining_time": "0:22:27", "throughput": 2421.32, "total_tokens": 3505024}
1722
+ {"current_steps": 8560, "total_steps": 16520, "loss": 0.0953, "lr": 2.7778113871548716e-05, "epoch": 10.36319612590799, "percentage": 51.82, "elapsed_time": "0:24:08", "remaining_time": "0:22:26", "throughput": 2421.38, "total_tokens": 3507072}
1723
+ {"current_steps": 8565, "total_steps": 16520, "loss": 0.1111, "lr": 2.7751863543724076e-05, "epoch": 10.369249394673124, "percentage": 51.85, "elapsed_time": "0:24:09", "remaining_time": "0:22:25", "throughput": 2421.42, "total_tokens": 3509152}
1724
+ {"current_steps": 8570, "total_steps": 16520, "loss": 0.072, "lr": 2.772561014432176e-05, "epoch": 10.375302663438257, "percentage": 51.88, "elapsed_time": "0:24:10", "remaining_time": "0:22:25", "throughput": 2421.5, "total_tokens": 3511360}
1725
+ {"current_steps": 8575, "total_steps": 16520, "loss": 0.1264, "lr": 2.7699353702645305e-05, "epoch": 10.38135593220339, "percentage": 51.91, "elapsed_time": "0:24:10", "remaining_time": "0:22:24", "throughput": 2421.6, "total_tokens": 3513408}
1726
+ {"current_steps": 8580, "total_steps": 16520, "loss": 0.129, "lr": 2.7673094248001646e-05, "epoch": 10.387409200968523, "percentage": 51.94, "elapsed_time": "0:24:11", "remaining_time": "0:22:23", "throughput": 2421.66, "total_tokens": 3515328}
1727
+ {"current_steps": 8585, "total_steps": 16520, "loss": 0.1619, "lr": 2.764683180970109e-05, "epoch": 10.393462469733656, "percentage": 51.97, "elapsed_time": "0:24:12", "remaining_time": "0:22:22", "throughput": 2421.67, "total_tokens": 3517376}
1728
+ {"current_steps": 8590, "total_steps": 16520, "loss": 0.1773, "lr": 2.762056641705727e-05, "epoch": 10.39951573849879, "percentage": 52.0, "elapsed_time": "0:24:13", "remaining_time": "0:22:21", "throughput": 2421.75, "total_tokens": 3519584}
1729
+ {"current_steps": 8595, "total_steps": 16520, "loss": 0.1655, "lr": 2.759429809938712e-05, "epoch": 10.405569007263923, "percentage": 52.03, "elapsed_time": "0:24:14", "remaining_time": "0:22:20", "throughput": 2421.81, "total_tokens": 3521632}
1730
+ {"current_steps": 8600, "total_steps": 16520, "loss": 0.1282, "lr": 2.756802688601084e-05, "epoch": 10.411622276029055, "percentage": 52.06, "elapsed_time": "0:24:14", "remaining_time": "0:22:19", "throughput": 2421.93, "total_tokens": 3523840}
1731
+ {"current_steps": 8605, "total_steps": 16520, "loss": 0.1439, "lr": 2.7541752806251848e-05, "epoch": 10.41767554479419, "percentage": 52.09, "elapsed_time": "0:24:15", "remaining_time": "0:22:19", "throughput": 2421.95, "total_tokens": 3525952}
1732
+ {"current_steps": 8610, "total_steps": 16520, "loss": 0.1216, "lr": 2.7515475889436775e-05, "epoch": 10.423728813559322, "percentage": 52.12, "elapsed_time": "0:24:16", "remaining_time": "0:22:18", "throughput": 2422.03, "total_tokens": 3528160}
1733
+ {"current_steps": 8615, "total_steps": 16520, "loss": 0.1401, "lr": 2.748919616489542e-05, "epoch": 10.429782082324456, "percentage": 52.15, "elapsed_time": "0:24:17", "remaining_time": "0:22:17", "throughput": 2422.12, "total_tokens": 3530272}
1734
+ {"current_steps": 8620, "total_steps": 16520, "loss": 0.129, "lr": 2.7462913661960703e-05, "epoch": 10.435835351089588, "percentage": 52.18, "elapsed_time": "0:24:18", "remaining_time": "0:22:16", "throughput": 2422.16, "total_tokens": 3532352}
1735
+ {"current_steps": 8625, "total_steps": 16520, "loss": 0.1421, "lr": 2.7436628409968664e-05, "epoch": 10.441888619854721, "percentage": 52.21, "elapsed_time": "0:24:19", "remaining_time": "0:22:15", "throughput": 2422.22, "total_tokens": 3534336}
1736
+ {"current_steps": 8630, "total_steps": 16520, "loss": 0.1282, "lr": 2.7410340438258388e-05, "epoch": 10.447941888619855, "percentage": 52.24, "elapsed_time": "0:24:19", "remaining_time": "0:22:14", "throughput": 2422.23, "total_tokens": 3536288}
1737
+ {"current_steps": 8635, "total_steps": 16520, "loss": 0.1452, "lr": 2.7384049776172015e-05, "epoch": 10.453995157384988, "percentage": 52.27, "elapsed_time": "0:24:20", "remaining_time": "0:22:13", "throughput": 2422.28, "total_tokens": 3538336}
1738
+ {"current_steps": 8640, "total_steps": 16520, "loss": 0.1124, "lr": 2.7357756453054688e-05, "epoch": 10.460048426150122, "percentage": 52.3, "elapsed_time": "0:24:21", "remaining_time": "0:22:13", "throughput": 2422.3, "total_tokens": 3540448}
1739
+ {"current_steps": 8645, "total_steps": 16520, "loss": 0.0963, "lr": 2.73314604982545e-05, "epoch": 10.466101694915254, "percentage": 52.33, "elapsed_time": "0:24:22", "remaining_time": "0:22:12", "throughput": 2422.32, "total_tokens": 3542432}
1740
+ {"current_steps": 8650, "total_steps": 16520, "loss": 0.1026, "lr": 2.730516194112251e-05, "epoch": 10.472154963680387, "percentage": 52.36, "elapsed_time": "0:24:23", "remaining_time": "0:22:11", "throughput": 2422.34, "total_tokens": 3544352}
1741
+ {"current_steps": 8655, "total_steps": 16520, "loss": 0.0635, "lr": 2.7278860811012652e-05, "epoch": 10.478208232445521, "percentage": 52.39, "elapsed_time": "0:24:23", "remaining_time": "0:22:10", "throughput": 2422.42, "total_tokens": 3546400}
1742
+ {"current_steps": 8660, "total_steps": 16520, "loss": 0.1259, "lr": 2.725255713728176e-05, "epoch": 10.484261501210653, "percentage": 52.42, "elapsed_time": "0:24:24", "remaining_time": "0:22:09", "throughput": 2422.48, "total_tokens": 3548576}
1743
+ {"current_steps": 8665, "total_steps": 16520, "loss": 0.2005, "lr": 2.722625094928948e-05, "epoch": 10.490314769975788, "percentage": 52.45, "elapsed_time": "0:24:25", "remaining_time": "0:22:08", "throughput": 2422.51, "total_tokens": 3550592}
1744
+ {"current_steps": 8670, "total_steps": 16520, "loss": 0.1163, "lr": 2.7199942276398284e-05, "epoch": 10.49636803874092, "percentage": 52.48, "elapsed_time": "0:24:26", "remaining_time": "0:22:07", "throughput": 2422.53, "total_tokens": 3552576}
1745
+ {"current_steps": 8675, "total_steps": 16520, "loss": 0.1439, "lr": 2.7173631147973412e-05, "epoch": 10.502421307506053, "percentage": 52.51, "elapsed_time": "0:24:27", "remaining_time": "0:22:06", "throughput": 2422.57, "total_tokens": 3554592}
1746
+ {"current_steps": 8680, "total_steps": 16520, "loss": 0.1144, "lr": 2.714731759338285e-05, "epoch": 10.508474576271187, "percentage": 52.54, "elapsed_time": "0:24:28", "remaining_time": "0:22:06", "throughput": 2422.6, "total_tokens": 3556672}
1747
+ {"current_steps": 8685, "total_steps": 16520, "loss": 0.0769, "lr": 2.712100164199728e-05, "epoch": 10.51452784503632, "percentage": 52.57, "elapsed_time": "0:24:28", "remaining_time": "0:22:05", "throughput": 2422.69, "total_tokens": 3558784}
1748
+ {"current_steps": 8690, "total_steps": 16520, "loss": 0.0339, "lr": 2.7094683323190063e-05, "epoch": 10.520581113801454, "percentage": 52.6, "elapsed_time": "0:24:29", "remaining_time": "0:22:04", "throughput": 2422.69, "total_tokens": 3560800}
1749
+ {"current_steps": 8695, "total_steps": 16520, "loss": 0.0666, "lr": 2.7068362666337217e-05, "epoch": 10.526634382566586, "percentage": 52.63, "elapsed_time": "0:24:30", "remaining_time": "0:22:03", "throughput": 2422.71, "total_tokens": 3562912}
1750
+ {"current_steps": 8700, "total_steps": 16520, "loss": 0.1151, "lr": 2.7042039700817347e-05, "epoch": 10.532687651331718, "percentage": 52.66, "elapsed_time": "0:24:31", "remaining_time": "0:22:02", "throughput": 2422.74, "total_tokens": 3564960}
1751
+ {"current_steps": 8705, "total_steps": 16520, "loss": 0.1641, "lr": 2.7015714456011647e-05, "epoch": 10.538740920096853, "percentage": 52.69, "elapsed_time": "0:24:32", "remaining_time": "0:22:01", "throughput": 2422.77, "total_tokens": 3566848}
1752
+ {"current_steps": 8710, "total_steps": 16520, "loss": 0.1644, "lr": 2.698938696130386e-05, "epoch": 10.544794188861985, "percentage": 52.72, "elapsed_time": "0:24:33", "remaining_time": "0:22:00", "throughput": 2422.77, "total_tokens": 3568832}
1753
+ {"current_steps": 8715, "total_steps": 16520, "loss": 0.1695, "lr": 2.6963057246080227e-05, "epoch": 10.55084745762712, "percentage": 52.75, "elapsed_time": "0:24:33", "remaining_time": "0:21:59", "throughput": 2422.79, "total_tokens": 3570848}
1754
+ {"current_steps": 8720, "total_steps": 16520, "loss": 0.1199, "lr": 2.693672533972948e-05, "epoch": 10.556900726392252, "percentage": 52.78, "elapsed_time": "0:24:34", "remaining_time": "0:21:59", "throughput": 2422.87, "total_tokens": 3572800}
1755
+ {"current_steps": 8725, "total_steps": 16520, "loss": 0.0854, "lr": 2.6910391271642793e-05, "epoch": 10.562953995157384, "percentage": 52.81, "elapsed_time": "0:24:35", "remaining_time": "0:21:58", "throughput": 2422.92, "total_tokens": 3574912}
1756
+ {"current_steps": 8730, "total_steps": 16520, "loss": 0.08, "lr": 2.6884055071213742e-05, "epoch": 10.569007263922519, "percentage": 52.85, "elapsed_time": "0:24:36", "remaining_time": "0:21:57", "throughput": 2422.48, "total_tokens": 3576896}
1757
+ {"current_steps": 8735, "total_steps": 16520, "loss": 0.1212, "lr": 2.685771676783831e-05, "epoch": 10.575060532687651, "percentage": 52.88, "elapsed_time": "0:24:37", "remaining_time": "0:21:56", "throughput": 2422.55, "total_tokens": 3578976}
1758
+ {"current_steps": 8740, "total_steps": 16520, "loss": 0.15, "lr": 2.6831376390914785e-05, "epoch": 10.581113801452785, "percentage": 52.91, "elapsed_time": "0:24:38", "remaining_time": "0:21:55", "throughput": 2422.63, "total_tokens": 3580928}
1759
+ {"current_steps": 8745, "total_steps": 16520, "loss": 0.0362, "lr": 2.680503396984382e-05, "epoch": 10.587167070217918, "percentage": 52.94, "elapsed_time": "0:24:38", "remaining_time": "0:21:54", "throughput": 2422.63, "total_tokens": 3582976}
1760
+ {"current_steps": 8750, "total_steps": 16520, "loss": 0.123, "lr": 2.6778689534028295e-05, "epoch": 10.59322033898305, "percentage": 52.97, "elapsed_time": "0:24:39", "remaining_time": "0:21:54", "throughput": 2422.7, "total_tokens": 3585184}
1761
+ {"current_steps": 8755, "total_steps": 16520, "loss": 0.139, "lr": 2.6752343112873397e-05, "epoch": 10.599273607748184, "percentage": 53.0, "elapsed_time": "0:24:40", "remaining_time": "0:21:53", "throughput": 2422.73, "total_tokens": 3587200}
1762
+ {"current_steps": 8760, "total_steps": 16520, "loss": 0.0878, "lr": 2.672599473578648e-05, "epoch": 10.605326876513317, "percentage": 53.03, "elapsed_time": "0:24:41", "remaining_time": "0:21:52", "throughput": 2422.78, "total_tokens": 3589312}
1763
+ {"current_steps": 8765, "total_steps": 16520, "loss": 0.0628, "lr": 2.6699644432177112e-05, "epoch": 10.611380145278451, "percentage": 53.06, "elapsed_time": "0:24:42", "remaining_time": "0:21:51", "throughput": 2422.79, "total_tokens": 3591392}
1764
+ {"current_steps": 8770, "total_steps": 16520, "loss": 0.1256, "lr": 2.6673292231456993e-05, "epoch": 10.617433414043584, "percentage": 53.09, "elapsed_time": "0:24:43", "remaining_time": "0:21:50", "throughput": 2422.84, "total_tokens": 3593440}
1765
+ {"current_steps": 8775, "total_steps": 16520, "loss": 0.1232, "lr": 2.6646938163039942e-05, "epoch": 10.623486682808716, "percentage": 53.12, "elapsed_time": "0:24:43", "remaining_time": "0:21:49", "throughput": 2422.92, "total_tokens": 3595328}
1766
+ {"current_steps": 8780, "total_steps": 16520, "loss": 0.0612, "lr": 2.6620582256341885e-05, "epoch": 10.62953995157385, "percentage": 53.15, "elapsed_time": "0:24:44", "remaining_time": "0:21:48", "throughput": 2423.0, "total_tokens": 3597472}
1767
+ {"current_steps": 8785, "total_steps": 16520, "loss": 0.1522, "lr": 2.659422454078077e-05, "epoch": 10.635593220338983, "percentage": 53.18, "elapsed_time": "0:24:45", "remaining_time": "0:21:48", "throughput": 2423.05, "total_tokens": 3599584}
1768
+ {"current_steps": 8790, "total_steps": 16520, "loss": 0.0573, "lr": 2.6567865045776586e-05, "epoch": 10.641646489104117, "percentage": 53.21, "elapsed_time": "0:24:46", "remaining_time": "0:21:47", "throughput": 2423.12, "total_tokens": 3601600}
1769
+ {"current_steps": 8795, "total_steps": 16520, "loss": 0.093, "lr": 2.654150380075129e-05, "epoch": 10.64769975786925, "percentage": 53.24, "elapsed_time": "0:24:47", "remaining_time": "0:21:46", "throughput": 2423.2, "total_tokens": 3603680}
1770
+ {"current_steps": 8800, "total_steps": 16520, "loss": 0.1338, "lr": 2.651514083512881e-05, "epoch": 10.653753026634382, "percentage": 53.27, "elapsed_time": "0:24:47", "remaining_time": "0:21:45", "throughput": 2423.26, "total_tokens": 3605728}
1771
+ {"current_steps": 8805, "total_steps": 16520, "loss": 0.1771, "lr": 2.6488776178334978e-05, "epoch": 10.659806295399516, "percentage": 53.3, "elapsed_time": "0:24:48", "remaining_time": "0:21:44", "throughput": 2423.25, "total_tokens": 3607680}
1772
+ {"current_steps": 8810, "total_steps": 16520, "loss": 0.0786, "lr": 2.646240985979753e-05, "epoch": 10.665859564164649, "percentage": 53.33, "elapsed_time": "0:24:49", "remaining_time": "0:21:43", "throughput": 2423.27, "total_tokens": 3609728}
1773
+ {"current_steps": 8815, "total_steps": 16520, "loss": 0.021, "lr": 2.6436041908946046e-05, "epoch": 10.671912832929783, "percentage": 53.36, "elapsed_time": "0:24:50", "remaining_time": "0:21:42", "throughput": 2423.35, "total_tokens": 3611808}
1774
+ {"current_steps": 8820, "total_steps": 16520, "loss": 0.1634, "lr": 2.6409672355211936e-05, "epoch": 10.677966101694915, "percentage": 53.39, "elapsed_time": "0:24:51", "remaining_time": "0:21:41", "throughput": 2423.38, "total_tokens": 3613696}
1775
+ {"current_steps": 8825, "total_steps": 16520, "loss": 0.1398, "lr": 2.638330122802838e-05, "epoch": 10.684019370460048, "percentage": 53.42, "elapsed_time": "0:24:51", "remaining_time": "0:21:40", "throughput": 2423.46, "total_tokens": 3615776}
1776
+ {"current_steps": 8830, "total_steps": 16520, "loss": 0.082, "lr": 2.635692855683033e-05, "epoch": 10.690072639225182, "percentage": 53.45, "elapsed_time": "0:24:52", "remaining_time": "0:21:40", "throughput": 2423.48, "total_tokens": 3617824}
1777
+ {"current_steps": 8835, "total_steps": 16520, "loss": 0.0726, "lr": 2.6330554371054467e-05, "epoch": 10.696125907990314, "percentage": 53.48, "elapsed_time": "0:24:53", "remaining_time": "0:21:39", "throughput": 2423.49, "total_tokens": 3619808}
1778
+ {"current_steps": 8840, "total_steps": 16520, "loss": 0.1917, "lr": 2.630417870013916e-05, "epoch": 10.702179176755449, "percentage": 53.51, "elapsed_time": "0:24:54", "remaining_time": "0:21:38", "throughput": 2423.65, "total_tokens": 3622176}
1779
+ {"current_steps": 8845, "total_steps": 16520, "loss": 0.0847, "lr": 2.627780157352442e-05, "epoch": 10.708232445520581, "percentage": 53.54, "elapsed_time": "0:24:55", "remaining_time": "0:21:37", "throughput": 2423.7, "total_tokens": 3624192}
1780
+ {"current_steps": 8850, "total_steps": 16520, "loss": 0.0797, "lr": 2.6251423020651893e-05, "epoch": 10.714285714285714, "percentage": 53.57, "elapsed_time": "0:24:56", "remaining_time": "0:21:36", "throughput": 2423.81, "total_tokens": 3626400}
1781
+ {"current_steps": 8855, "total_steps": 16520, "loss": 0.0506, "lr": 2.622504307096482e-05, "epoch": 10.720338983050848, "percentage": 53.6, "elapsed_time": "0:24:56", "remaining_time": "0:21:35", "throughput": 2423.77, "total_tokens": 3628256}
1782
+ {"current_steps": 8860, "total_steps": 16520, "loss": 0.1308, "lr": 2.6198661753908004e-05, "epoch": 10.72639225181598, "percentage": 53.63, "elapsed_time": "0:24:57", "remaining_time": "0:21:34", "throughput": 2423.83, "total_tokens": 3630304}
1783
+ {"current_steps": 8865, "total_steps": 16520, "loss": 0.0879, "lr": 2.6172279098927772e-05, "epoch": 10.732445520581114, "percentage": 53.66, "elapsed_time": "0:24:58", "remaining_time": "0:21:34", "throughput": 2423.85, "total_tokens": 3632352}
1784
+ {"current_steps": 8870, "total_steps": 16520, "loss": 0.1112, "lr": 2.614589513547192e-05, "epoch": 10.738498789346247, "percentage": 53.69, "elapsed_time": "0:24:59", "remaining_time": "0:21:33", "throughput": 2423.91, "total_tokens": 3634336}
1785
+ {"current_steps": 8875, "total_steps": 16520, "loss": 0.1049, "lr": 2.6119509892989747e-05, "epoch": 10.74455205811138, "percentage": 53.72, "elapsed_time": "0:25:00", "remaining_time": "0:21:32", "throughput": 2423.98, "total_tokens": 3636416}
1786
+ {"current_steps": 8880, "total_steps": 16520, "loss": 0.0924, "lr": 2.6093123400931945e-05, "epoch": 10.750605326876514, "percentage": 53.75, "elapsed_time": "0:25:00", "remaining_time": "0:21:31", "throughput": 2424.05, "total_tokens": 3638432}
1787
+ {"current_steps": 8885, "total_steps": 16520, "loss": 0.045, "lr": 2.6066735688750626e-05, "epoch": 10.756658595641646, "percentage": 53.78, "elapsed_time": "0:25:01", "remaining_time": "0:21:30", "throughput": 2424.16, "total_tokens": 3640512}
1788
+ {"current_steps": 8890, "total_steps": 16520, "loss": 0.0672, "lr": 2.604034678589924e-05, "epoch": 10.76271186440678, "percentage": 53.81, "elapsed_time": "0:25:02", "remaining_time": "0:21:29", "throughput": 2424.2, "total_tokens": 3642656}
1789
+ {"current_steps": 8895, "total_steps": 16520, "loss": 0.1711, "lr": 2.6013956721832582e-05, "epoch": 10.768765133171913, "percentage": 53.84, "elapsed_time": "0:25:03", "remaining_time": "0:21:28", "throughput": 2424.24, "total_tokens": 3644672}
1790
+ {"current_steps": 8900, "total_steps": 16520, "loss": 0.1108, "lr": 2.5987565526006748e-05, "epoch": 10.774818401937045, "percentage": 53.87, "elapsed_time": "0:25:04", "remaining_time": "0:21:27", "throughput": 2424.26, "total_tokens": 3646720}
1791
+ {"current_steps": 8905, "total_steps": 16520, "loss": 0.13, "lr": 2.596117322787907e-05, "epoch": 10.78087167070218, "percentage": 53.9, "elapsed_time": "0:25:05", "remaining_time": "0:21:27", "throughput": 2424.36, "total_tokens": 3648768}
1792
+ {"current_steps": 8910, "total_steps": 16520, "loss": 0.1313, "lr": 2.593477985690815e-05, "epoch": 10.786924939467312, "percentage": 53.93, "elapsed_time": "0:25:05", "remaining_time": "0:21:26", "throughput": 2424.4, "total_tokens": 3650848}
1793
+ {"current_steps": 8915, "total_steps": 16520, "loss": 0.204, "lr": 2.590838544255374e-05, "epoch": 10.792978208232446, "percentage": 53.96, "elapsed_time": "0:25:06", "remaining_time": "0:21:25", "throughput": 2424.5, "total_tokens": 3652896}
1794
+ {"current_steps": 8920, "total_steps": 16520, "loss": 0.0738, "lr": 2.5881990014276808e-05, "epoch": 10.799031476997579, "percentage": 54.0, "elapsed_time": "0:25:07", "remaining_time": "0:21:24", "throughput": 2424.56, "total_tokens": 3654944}
1795
+ {"current_steps": 8925, "total_steps": 16520, "loss": 0.1112, "lr": 2.5855593601539412e-05, "epoch": 10.805084745762711, "percentage": 54.03, "elapsed_time": "0:25:08", "remaining_time": "0:21:23", "throughput": 2424.65, "total_tokens": 3657120}
1796
+ {"current_steps": 8930, "total_steps": 16520, "loss": 0.1355, "lr": 2.5829196233804738e-05, "epoch": 10.811138014527845, "percentage": 54.06, "elapsed_time": "0:25:09", "remaining_time": "0:21:22", "throughput": 2424.73, "total_tokens": 3659200}
1797
+ {"current_steps": 8935, "total_steps": 16520, "loss": 0.1103, "lr": 2.5802797940537004e-05, "epoch": 10.817191283292978, "percentage": 54.09, "elapsed_time": "0:25:09", "remaining_time": "0:21:21", "throughput": 2424.76, "total_tokens": 3661216}
1798
+ {"current_steps": 8940, "total_steps": 16520, "loss": 0.0735, "lr": 2.5776398751201507e-05, "epoch": 10.823244552058112, "percentage": 54.12, "elapsed_time": "0:25:10", "remaining_time": "0:21:20", "throughput": 2424.86, "total_tokens": 3663264}
1799
+ {"current_steps": 8945, "total_steps": 16520, "loss": 0.1137, "lr": 2.574999869526451e-05, "epoch": 10.829297820823244, "percentage": 54.15, "elapsed_time": "0:25:11", "remaining_time": "0:21:20", "throughput": 2424.93, "total_tokens": 3665344}
1800
+ {"current_steps": 8950, "total_steps": 16520, "loss": 0.1253, "lr": 2.5723597802193256e-05, "epoch": 10.835351089588377, "percentage": 54.18, "elapsed_time": "0:25:12", "remaining_time": "0:21:19", "throughput": 2424.92, "total_tokens": 3667296}
1801
+ {"current_steps": 8955, "total_steps": 16520, "loss": 0.1477, "lr": 2.56971961014559e-05, "epoch": 10.841404358353511, "percentage": 54.21, "elapsed_time": "0:25:13", "remaining_time": "0:21:18", "throughput": 2424.97, "total_tokens": 3669280}
1802
+ {"current_steps": 8960, "total_steps": 16520, "loss": 0.0936, "lr": 2.5670793622521544e-05, "epoch": 10.847457627118644, "percentage": 54.24, "elapsed_time": "0:25:13", "remaining_time": "0:21:17", "throughput": 2425.05, "total_tokens": 3671360}
1803
+ {"current_steps": 8965, "total_steps": 16520, "loss": 0.2147, "lr": 2.564439039486012e-05, "epoch": 10.853510895883778, "percentage": 54.27, "elapsed_time": "0:25:14", "remaining_time": "0:21:16", "throughput": 2425.1, "total_tokens": 3673344}
1804
+ {"current_steps": 8970, "total_steps": 16520, "loss": 0.1662, "lr": 2.5617986447942406e-05, "epoch": 10.85956416464891, "percentage": 54.3, "elapsed_time": "0:25:15", "remaining_time": "0:21:15", "throughput": 2425.15, "total_tokens": 3675456}
1805
+ {"current_steps": 8975, "total_steps": 16520, "loss": 0.1022, "lr": 2.5591581811239983e-05, "epoch": 10.865617433414045, "percentage": 54.33, "elapsed_time": "0:25:16", "remaining_time": "0:21:14", "throughput": 2425.25, "total_tokens": 3677568}
1806
+ {"current_steps": 8980, "total_steps": 16520, "loss": 0.1247, "lr": 2.5565176514225213e-05, "epoch": 10.871670702179177, "percentage": 54.36, "elapsed_time": "0:25:17", "remaining_time": "0:21:13", "throughput": 2425.25, "total_tokens": 3679520}
1807
+ {"current_steps": 8985, "total_steps": 16520, "loss": 0.1518, "lr": 2.553877058637118e-05, "epoch": 10.87772397094431, "percentage": 54.39, "elapsed_time": "0:25:17", "remaining_time": "0:21:13", "throughput": 2425.32, "total_tokens": 3681600}
1808
+ {"current_steps": 8990, "total_steps": 16520, "loss": 0.0911, "lr": 2.5512364057151676e-05, "epoch": 10.883777239709444, "percentage": 54.42, "elapsed_time": "0:25:18", "remaining_time": "0:21:12", "throughput": 2425.37, "total_tokens": 3683520}
1809
+ {"current_steps": 8995, "total_steps": 16520, "loss": 0.1225, "lr": 2.5485956956041167e-05, "epoch": 10.889830508474576, "percentage": 54.45, "elapsed_time": "0:25:19", "remaining_time": "0:21:11", "throughput": 2425.4, "total_tokens": 3685536}
1810
+ {"current_steps": 9000, "total_steps": 16520, "loss": 0.1581, "lr": 2.5459549312514764e-05, "epoch": 10.89588377723971, "percentage": 54.48, "elapsed_time": "0:25:20", "remaining_time": "0:21:10", "throughput": 2425.44, "total_tokens": 3687552}
1811
+ {"current_steps": 9005, "total_steps": 16520, "loss": 0.1, "lr": 2.5433141156048163e-05, "epoch": 10.901937046004843, "percentage": 54.51, "elapsed_time": "0:25:21", "remaining_time": "0:21:09", "throughput": 2425.55, "total_tokens": 3689632}
1812
+ {"current_steps": 9010, "total_steps": 16520, "loss": 0.1055, "lr": 2.5406732516117655e-05, "epoch": 10.907990314769975, "percentage": 54.54, "elapsed_time": "0:25:21", "remaining_time": "0:21:08", "throughput": 2425.61, "total_tokens": 3691648}
1813
+ {"current_steps": 9015, "total_steps": 16520, "loss": 0.0537, "lr": 2.5380323422200053e-05, "epoch": 10.91404358353511, "percentage": 54.57, "elapsed_time": "0:25:22", "remaining_time": "0:21:07", "throughput": 2425.65, "total_tokens": 3693728}
1814
+ {"current_steps": 9020, "total_steps": 16520, "loss": 0.0968, "lr": 2.5353913903772696e-05, "epoch": 10.920096852300242, "percentage": 54.6, "elapsed_time": "0:25:23", "remaining_time": "0:21:06", "throughput": 2425.71, "total_tokens": 3695840}
1815
+ {"current_steps": 9025, "total_steps": 16520, "loss": 0.0628, "lr": 2.5327503990313377e-05, "epoch": 10.926150121065376, "percentage": 54.63, "elapsed_time": "0:25:24", "remaining_time": "0:21:05", "throughput": 2425.75, "total_tokens": 3697856}
1816
+ {"current_steps": 9030, "total_steps": 16520, "loss": 0.1585, "lr": 2.5301093711300344e-05, "epoch": 10.932203389830509, "percentage": 54.66, "elapsed_time": "0:25:25", "remaining_time": "0:21:05", "throughput": 2425.81, "total_tokens": 3699968}
1817
+ {"current_steps": 9035, "total_steps": 16520, "loss": 0.1043, "lr": 2.5274683096212237e-05, "epoch": 10.938256658595641, "percentage": 54.69, "elapsed_time": "0:25:26", "remaining_time": "0:21:04", "throughput": 2425.89, "total_tokens": 3702080}
1818
+ {"current_steps": 9040, "total_steps": 16520, "loss": 0.214, "lr": 2.5248272174528093e-05, "epoch": 10.944309927360775, "percentage": 54.72, "elapsed_time": "0:25:26", "remaining_time": "0:21:03", "throughput": 2425.92, "total_tokens": 3704032}
1819
+ {"current_steps": 9045, "total_steps": 16520, "loss": 0.2279, "lr": 2.5221860975727275e-05, "epoch": 10.950363196125908, "percentage": 54.75, "elapsed_time": "0:25:27", "remaining_time": "0:21:02", "throughput": 2426.03, "total_tokens": 3706176}
1820
+ {"current_steps": 9050, "total_steps": 16520, "loss": 0.0962, "lr": 2.5195449529289472e-05, "epoch": 10.956416464891042, "percentage": 54.78, "elapsed_time": "0:25:28", "remaining_time": "0:21:01", "throughput": 2426.07, "total_tokens": 3708128}
1821
+ {"current_steps": 9055, "total_steps": 16520, "loss": 0.1387, "lr": 2.516903786469461e-05, "epoch": 10.962469733656174, "percentage": 54.81, "elapsed_time": "0:25:29", "remaining_time": "0:21:00", "throughput": 2426.1, "total_tokens": 3710080}
1822
+ {"current_steps": 9060, "total_steps": 16520, "loss": 0.0869, "lr": 2.514262601142291e-05, "epoch": 10.968523002421307, "percentage": 54.84, "elapsed_time": "0:25:30", "remaining_time": "0:20:59", "throughput": 2426.14, "total_tokens": 3712096}
1823
+ {"current_steps": 9065, "total_steps": 16520, "loss": 0.0582, "lr": 2.5116213998954768e-05, "epoch": 10.974576271186441, "percentage": 54.87, "elapsed_time": "0:25:30", "remaining_time": "0:20:58", "throughput": 2426.19, "total_tokens": 3714208}
1824
+ {"current_steps": 9070, "total_steps": 16520, "loss": 0.1013, "lr": 2.5089801856770778e-05, "epoch": 10.980629539951574, "percentage": 54.9, "elapsed_time": "0:25:31", "remaining_time": "0:20:58", "throughput": 2426.28, "total_tokens": 3716384}
1825
+ {"current_steps": 9075, "total_steps": 16520, "loss": 0.0771, "lr": 2.5063389614351656e-05, "epoch": 10.986682808716708, "percentage": 54.93, "elapsed_time": "0:25:32", "remaining_time": "0:20:57", "throughput": 2426.36, "total_tokens": 3718464}
1826
+ {"current_steps": 9080, "total_steps": 16520, "loss": 0.0744, "lr": 2.5036977301178266e-05, "epoch": 10.99273607748184, "percentage": 54.96, "elapsed_time": "0:25:33", "remaining_time": "0:20:56", "throughput": 2426.41, "total_tokens": 3720448}
1827
+ {"current_steps": 9085, "total_steps": 16520, "loss": 0.0656, "lr": 2.5010564946731512e-05, "epoch": 10.998789346246973, "percentage": 54.99, "elapsed_time": "0:25:34", "remaining_time": "0:20:55", "throughput": 2426.47, "total_tokens": 3722432}
1828
+ {"current_steps": 9086, "total_steps": 16520, "eval_loss": 0.12728272378444672, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:25:39", "remaining_time": "0:20:59", "throughput": 2418.69, "total_tokens": 3722552}
1829
+ {"current_steps": 9090, "total_steps": 16520, "loss": 0.1058, "lr": 2.4984152580492366e-05, "epoch": 11.004842615012107, "percentage": 55.02, "elapsed_time": "0:25:40", "remaining_time": "0:20:59", "throughput": 2417.3, "total_tokens": 3724216}
1830
+ {"current_steps": 9095, "total_steps": 16520, "loss": 0.1287, "lr": 2.49577402319418e-05, "epoch": 11.01089588377724, "percentage": 55.05, "elapsed_time": "0:25:41", "remaining_time": "0:20:58", "throughput": 2417.32, "total_tokens": 3726232}
1831
+ {"current_steps": 9100, "total_steps": 16520, "loss": 0.2131, "lr": 2.4931327930560798e-05, "epoch": 11.016949152542374, "percentage": 55.08, "elapsed_time": "0:25:42", "remaining_time": "0:20:57", "throughput": 2417.35, "total_tokens": 3728280}
1832
+ {"current_steps": 9105, "total_steps": 16520, "loss": 0.0827, "lr": 2.4904915705830238e-05, "epoch": 11.023002421307506, "percentage": 55.12, "elapsed_time": "0:25:43", "remaining_time": "0:20:56", "throughput": 2417.37, "total_tokens": 3730296}
1833
+ {"current_steps": 9110, "total_steps": 16520, "loss": 0.0454, "lr": 2.487850358723097e-05, "epoch": 11.029055690072639, "percentage": 55.15, "elapsed_time": "0:25:43", "remaining_time": "0:20:55", "throughput": 2417.44, "total_tokens": 3732504}
1834
+ {"current_steps": 9115, "total_steps": 16520, "loss": 0.0506, "lr": 2.4852091604243663e-05, "epoch": 11.035108958837773, "percentage": 55.18, "elapsed_time": "0:25:44", "remaining_time": "0:20:55", "throughput": 2417.45, "total_tokens": 3734616}
1835
+ {"current_steps": 9120, "total_steps": 16520, "loss": 0.0896, "lr": 2.482567978634891e-05, "epoch": 11.041162227602905, "percentage": 55.21, "elapsed_time": "0:25:45", "remaining_time": "0:20:54", "throughput": 2417.53, "total_tokens": 3736728}