rbelanec commited on
Commit
b6adc6f
verified
1 Parent(s): 880171a

Training in progress, step 9086

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +167 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25107ac651870b45f9342126777781918e6df6ee9099d8c320cb725995c07650
3
  size 819328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4eb15a611d31c98b95b6f042f87bf79e1991c6437cccb606decebc92d37a4f7
3
  size 819328
trainer_log.jsonl CHANGED
@@ -1662,3 +1662,170 @@
1662
  {"current_steps": 8260, "total_steps": 16520, "eval_loss": 0.2184004932641983, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:27:21", "remaining_time": "0:27:21", "throughput": 2061.55, "total_tokens": 3384288}
1663
  {"current_steps": 8265, "total_steps": 16520, "loss": 0.2331, "lr": 0.01759223640602779, "epoch": 10.006053268765132, "percentage": 50.03, "elapsed_time": "0:27:23", "remaining_time": "0:27:21", "throughput": 2060.54, "total_tokens": 3386304}
1664
  {"current_steps": 8270, "total_steps": 16520, "loss": 0.2535, "lr": 0.017576625979288465, "epoch": 10.012106537530267, "percentage": 50.06, "elapsed_time": "0:27:24", "remaining_time": "0:27:20", "throughput": 2060.59, "total_tokens": 3388224}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1662
  {"current_steps": 8260, "total_steps": 16520, "eval_loss": 0.2184004932641983, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:27:21", "remaining_time": "0:27:21", "throughput": 2061.55, "total_tokens": 3384288}
1663
  {"current_steps": 8265, "total_steps": 16520, "loss": 0.2331, "lr": 0.01759223640602779, "epoch": 10.006053268765132, "percentage": 50.03, "elapsed_time": "0:27:23", "remaining_time": "0:27:21", "throughput": 2060.54, "total_tokens": 3386304}
1664
  {"current_steps": 8270, "total_steps": 16520, "loss": 0.2535, "lr": 0.017576625979288465, "epoch": 10.012106537530267, "percentage": 50.06, "elapsed_time": "0:27:24", "remaining_time": "0:27:20", "throughput": 2060.59, "total_tokens": 3388224}
1665
+ {"current_steps": 8275, "total_steps": 16520, "loss": 0.1575, "lr": 0.017561012676568295, "epoch": 10.0181598062954, "percentage": 50.09, "elapsed_time": "0:27:25", "remaining_time": "0:27:19", "throughput": 2060.62, "total_tokens": 3390240}
1666
+ {"current_steps": 8280, "total_steps": 16520, "loss": 0.1674, "lr": 0.017545396515294544, "epoch": 10.024213075060533, "percentage": 50.12, "elapsed_time": "0:27:26", "remaining_time": "0:27:18", "throughput": 2060.68, "total_tokens": 3392320}
1667
+ {"current_steps": 8285, "total_steps": 16520, "loss": 0.1969, "lr": 0.017529777512897672, "epoch": 10.030266343825666, "percentage": 50.15, "elapsed_time": "0:27:27", "remaining_time": "0:27:17", "throughput": 2060.74, "total_tokens": 3394336}
1668
+ {"current_steps": 8290, "total_steps": 16520, "loss": 0.1942, "lr": 0.017514155686811316, "epoch": 10.036319612590798, "percentage": 50.18, "elapsed_time": "0:27:28", "remaining_time": "0:27:16", "throughput": 2060.77, "total_tokens": 3396288}
1669
+ {"current_steps": 8295, "total_steps": 16520, "loss": 0.1675, "lr": 0.017498531054472258, "epoch": 10.042372881355933, "percentage": 50.21, "elapsed_time": "0:27:29", "remaining_time": "0:27:15", "throughput": 2060.87, "total_tokens": 3398432}
1670
+ {"current_steps": 8300, "total_steps": 16520, "loss": 0.2198, "lr": 0.017482903633320414, "epoch": 10.048426150121065, "percentage": 50.24, "elapsed_time": "0:27:29", "remaining_time": "0:27:13", "throughput": 2060.93, "total_tokens": 3400320}
1671
+ {"current_steps": 8305, "total_steps": 16520, "loss": 0.1938, "lr": 0.017467273440798823, "epoch": 10.0544794188862, "percentage": 50.27, "elapsed_time": "0:27:30", "remaining_time": "0:27:12", "throughput": 2061.0, "total_tokens": 3402432}
1672
+ {"current_steps": 8310, "total_steps": 16520, "loss": 0.1472, "lr": 0.017451640494353597, "epoch": 10.060532687651332, "percentage": 50.3, "elapsed_time": "0:27:31", "remaining_time": "0:27:11", "throughput": 2061.06, "total_tokens": 3404512}
1673
+ {"current_steps": 8315, "total_steps": 16520, "loss": 0.225, "lr": 0.01743600481143394, "epoch": 10.066585956416464, "percentage": 50.33, "elapsed_time": "0:27:32", "remaining_time": "0:27:10", "throughput": 2061.18, "total_tokens": 3406720}
1674
+ {"current_steps": 8320, "total_steps": 16520, "loss": 0.187, "lr": 0.0174203664094921, "epoch": 10.072639225181598, "percentage": 50.36, "elapsed_time": "0:27:33", "remaining_time": "0:27:09", "throughput": 2061.19, "total_tokens": 3408640}
1675
+ {"current_steps": 8325, "total_steps": 16520, "loss": 0.1405, "lr": 0.017404725305983363, "epoch": 10.07869249394673, "percentage": 50.39, "elapsed_time": "0:27:34", "remaining_time": "0:27:08", "throughput": 2061.25, "total_tokens": 3410656}
1676
+ {"current_steps": 8330, "total_steps": 16520, "loss": 0.1752, "lr": 0.017389081518366035, "epoch": 10.084745762711865, "percentage": 50.42, "elapsed_time": "0:27:35", "remaining_time": "0:27:07", "throughput": 2061.3, "total_tokens": 3412704}
1677
+ {"current_steps": 8335, "total_steps": 16520, "loss": 0.1976, "lr": 0.017373435064101408, "epoch": 10.090799031476998, "percentage": 50.45, "elapsed_time": "0:27:36", "remaining_time": "0:27:06", "throughput": 2061.35, "total_tokens": 3414752}
1678
+ {"current_steps": 8340, "total_steps": 16520, "loss": 0.1188, "lr": 0.01735778596065377, "epoch": 10.09685230024213, "percentage": 50.48, "elapsed_time": "0:27:37", "remaining_time": "0:27:05", "throughput": 2061.42, "total_tokens": 3416864}
1679
+ {"current_steps": 8345, "total_steps": 16520, "loss": 0.1978, "lr": 0.01734213422549033, "epoch": 10.102905569007264, "percentage": 50.51, "elapsed_time": "0:27:38", "remaining_time": "0:27:04", "throughput": 2061.44, "total_tokens": 3418848}
1680
+ {"current_steps": 8350, "total_steps": 16520, "loss": 0.1508, "lr": 0.017326479876081276, "epoch": 10.108958837772397, "percentage": 50.54, "elapsed_time": "0:27:39", "remaining_time": "0:27:03", "throughput": 2061.55, "total_tokens": 3421024}
1681
+ {"current_steps": 8355, "total_steps": 16520, "loss": 0.1935, "lr": 0.017310822929899685, "epoch": 10.115012106537531, "percentage": 50.58, "elapsed_time": "0:27:40", "remaining_time": "0:27:02", "throughput": 2061.66, "total_tokens": 3423168}
1682
+ {"current_steps": 8360, "total_steps": 16520, "loss": 0.1476, "lr": 0.017295163404421544, "epoch": 10.121065375302663, "percentage": 50.61, "elapsed_time": "0:27:41", "remaining_time": "0:27:01", "throughput": 2061.79, "total_tokens": 3425344}
1683
+ {"current_steps": 8365, "total_steps": 16520, "loss": 0.1363, "lr": 0.01727950131712572, "epoch": 10.127118644067796, "percentage": 50.64, "elapsed_time": "0:27:42", "remaining_time": "0:27:00", "throughput": 2061.86, "total_tokens": 3427392}
1684
+ {"current_steps": 8370, "total_steps": 16520, "loss": 0.2039, "lr": 0.017263836685493927, "epoch": 10.13317191283293, "percentage": 50.67, "elapsed_time": "0:27:43", "remaining_time": "0:26:59", "throughput": 2061.87, "total_tokens": 3429248}
1685
+ {"current_steps": 8375, "total_steps": 16520, "loss": 0.1981, "lr": 0.017248169527010745, "epoch": 10.139225181598063, "percentage": 50.7, "elapsed_time": "0:27:44", "remaining_time": "0:26:58", "throughput": 2061.95, "total_tokens": 3431296}
1686
+ {"current_steps": 8380, "total_steps": 16520, "loss": 0.1798, "lr": 0.017232499859163544, "epoch": 10.145278450363197, "percentage": 50.73, "elapsed_time": "0:27:45", "remaining_time": "0:26:57", "throughput": 2062.05, "total_tokens": 3433408}
1687
+ {"current_steps": 8385, "total_steps": 16520, "loss": 0.1485, "lr": 0.01721682769944252, "epoch": 10.15133171912833, "percentage": 50.76, "elapsed_time": "0:27:46", "remaining_time": "0:26:56", "throughput": 2062.19, "total_tokens": 3435648}
1688
+ {"current_steps": 8390, "total_steps": 16520, "loss": 0.2268, "lr": 0.017201153065340622, "epoch": 10.157384987893462, "percentage": 50.79, "elapsed_time": "0:27:46", "remaining_time": "0:26:55", "throughput": 2062.25, "total_tokens": 3437600}
1689
+ {"current_steps": 8395, "total_steps": 16520, "loss": 0.1755, "lr": 0.017185475974353596, "epoch": 10.163438256658596, "percentage": 50.82, "elapsed_time": "0:27:47", "remaining_time": "0:26:54", "throughput": 2062.36, "total_tokens": 3439776}
1690
+ {"current_steps": 8400, "total_steps": 16520, "loss": 0.178, "lr": 0.017169796443979903, "epoch": 10.169491525423728, "percentage": 50.85, "elapsed_time": "0:27:48", "remaining_time": "0:26:53", "throughput": 2062.37, "total_tokens": 3441696}
1691
+ {"current_steps": 8405, "total_steps": 16520, "loss": 0.1952, "lr": 0.01715411449172074, "epoch": 10.175544794188863, "percentage": 50.88, "elapsed_time": "0:27:49", "remaining_time": "0:26:52", "throughput": 2062.4, "total_tokens": 3443648}
1692
+ {"current_steps": 8410, "total_steps": 16520, "loss": 0.1956, "lr": 0.017138430135080002, "epoch": 10.181598062953995, "percentage": 50.91, "elapsed_time": "0:27:50", "remaining_time": "0:26:51", "throughput": 2062.43, "total_tokens": 3445536}
1693
+ {"current_steps": 8415, "total_steps": 16520, "loss": 0.1826, "lr": 0.017122743391564273, "epoch": 10.187651331719128, "percentage": 50.94, "elapsed_time": "0:27:51", "remaining_time": "0:26:50", "throughput": 2062.47, "total_tokens": 3447584}
1694
+ {"current_steps": 8420, "total_steps": 16520, "loss": 0.1842, "lr": 0.01710705427868279, "epoch": 10.193704600484262, "percentage": 50.97, "elapsed_time": "0:27:52", "remaining_time": "0:26:48", "throughput": 2062.48, "total_tokens": 3449568}
1695
+ {"current_steps": 8425, "total_steps": 16520, "loss": 0.1954, "lr": 0.017091362813947443, "epoch": 10.199757869249394, "percentage": 51.0, "elapsed_time": "0:27:53", "remaining_time": "0:26:47", "throughput": 2062.56, "total_tokens": 3451680}
1696
+ {"current_steps": 8430, "total_steps": 16520, "loss": 0.2477, "lr": 0.01707566901487275, "epoch": 10.205811138014528, "percentage": 51.03, "elapsed_time": "0:27:54", "remaining_time": "0:26:46", "throughput": 2062.62, "total_tokens": 3453696}
1697
+ {"current_steps": 8435, "total_steps": 16520, "loss": 0.1695, "lr": 0.01705997289897583, "epoch": 10.211864406779661, "percentage": 51.06, "elapsed_time": "0:27:55", "remaining_time": "0:26:45", "throughput": 2062.72, "total_tokens": 3455808}
1698
+ {"current_steps": 8440, "total_steps": 16520, "loss": 0.1655, "lr": 0.017044274483776392, "epoch": 10.217917675544793, "percentage": 51.09, "elapsed_time": "0:27:56", "remaining_time": "0:26:44", "throughput": 2062.8, "total_tokens": 3457856}
1699
+ {"current_steps": 8445, "total_steps": 16520, "loss": 0.1679, "lr": 0.017028573786796702, "epoch": 10.223970944309928, "percentage": 51.12, "elapsed_time": "0:27:57", "remaining_time": "0:26:43", "throughput": 2062.87, "total_tokens": 3459904}
1700
+ {"current_steps": 8450, "total_steps": 16520, "loss": 0.1799, "lr": 0.017012870825561577, "epoch": 10.23002421307506, "percentage": 51.15, "elapsed_time": "0:27:58", "remaining_time": "0:26:42", "throughput": 2062.95, "total_tokens": 3461824}
1701
+ {"current_steps": 8455, "total_steps": 16520, "loss": 0.1592, "lr": 0.01699716561759837, "epoch": 10.236077481840194, "percentage": 51.18, "elapsed_time": "0:27:59", "remaining_time": "0:26:41", "throughput": 2063.01, "total_tokens": 3463904}
1702
+ {"current_steps": 8460, "total_steps": 16520, "loss": 0.1992, "lr": 0.016981458180436934, "epoch": 10.242130750605327, "percentage": 51.21, "elapsed_time": "0:28:00", "remaining_time": "0:26:40", "throughput": 2063.1, "total_tokens": 3466048}
1703
+ {"current_steps": 8465, "total_steps": 16520, "loss": 0.1586, "lr": 0.016965748531609613, "epoch": 10.24818401937046, "percentage": 51.24, "elapsed_time": "0:28:00", "remaining_time": "0:26:39", "throughput": 2063.16, "total_tokens": 3468128}
1704
+ {"current_steps": 8470, "total_steps": 16520, "loss": 0.1989, "lr": 0.016950036688651207, "epoch": 10.254237288135593, "percentage": 51.27, "elapsed_time": "0:28:01", "remaining_time": "0:26:38", "throughput": 2063.25, "total_tokens": 3470272}
1705
+ {"current_steps": 8475, "total_steps": 16520, "loss": 0.222, "lr": 0.01693432266909899, "epoch": 10.260290556900726, "percentage": 51.3, "elapsed_time": "0:28:02", "remaining_time": "0:26:37", "throughput": 2063.32, "total_tokens": 3472384}
1706
+ {"current_steps": 8480, "total_steps": 16520, "loss": 0.1778, "lr": 0.016918606490492642, "epoch": 10.26634382566586, "percentage": 51.33, "elapsed_time": "0:28:03", "remaining_time": "0:26:36", "throughput": 2063.32, "total_tokens": 3474336}
1707
+ {"current_steps": 8485, "total_steps": 16520, "loss": 0.1973, "lr": 0.01690288817037427, "epoch": 10.272397094430993, "percentage": 51.36, "elapsed_time": "0:28:04", "remaining_time": "0:26:35", "throughput": 2063.4, "total_tokens": 3476384}
1708
+ {"current_steps": 8490, "total_steps": 16520, "loss": 0.1943, "lr": 0.01688716772628835, "epoch": 10.278450363196125, "percentage": 51.39, "elapsed_time": "0:28:05", "remaining_time": "0:26:34", "throughput": 2063.41, "total_tokens": 3478304}
1709
+ {"current_steps": 8495, "total_steps": 16520, "loss": 0.1931, "lr": 0.016871445175781763, "epoch": 10.28450363196126, "percentage": 51.42, "elapsed_time": "0:28:06", "remaining_time": "0:26:33", "throughput": 2063.47, "total_tokens": 3480384}
1710
+ {"current_steps": 8500, "total_steps": 16520, "loss": 0.2054, "lr": 0.0168557205364037, "epoch": 10.290556900726392, "percentage": 51.45, "elapsed_time": "0:28:07", "remaining_time": "0:26:32", "throughput": 2063.5, "total_tokens": 3482400}
1711
+ {"current_steps": 8505, "total_steps": 16520, "loss": 0.2243, "lr": 0.016839993825705717, "epoch": 10.296610169491526, "percentage": 51.48, "elapsed_time": "0:28:08", "remaining_time": "0:26:31", "throughput": 2063.55, "total_tokens": 3484480}
1712
+ {"current_steps": 8510, "total_steps": 16520, "loss": 0.1898, "lr": 0.01682426506124166, "epoch": 10.302663438256658, "percentage": 51.51, "elapsed_time": "0:28:09", "remaining_time": "0:26:30", "throughput": 2063.57, "total_tokens": 3486464}
1713
+ {"current_steps": 8515, "total_steps": 16520, "loss": 0.1884, "lr": 0.016808534260567682, "epoch": 10.30871670702179, "percentage": 51.54, "elapsed_time": "0:28:10", "remaining_time": "0:26:29", "throughput": 2063.64, "total_tokens": 3488512}
1714
+ {"current_steps": 8520, "total_steps": 16520, "loss": 0.1621, "lr": 0.016792801441242206, "epoch": 10.314769975786925, "percentage": 51.57, "elapsed_time": "0:28:11", "remaining_time": "0:26:28", "throughput": 2063.67, "total_tokens": 3490464}
1715
+ {"current_steps": 8525, "total_steps": 16520, "loss": 0.1832, "lr": 0.016777066620825898, "epoch": 10.320823244552058, "percentage": 51.6, "elapsed_time": "0:28:12", "remaining_time": "0:26:27", "throughput": 2063.82, "total_tokens": 3492736}
1716
+ {"current_steps": 8530, "total_steps": 16520, "loss": 0.2166, "lr": 0.016761329816881672, "epoch": 10.326876513317192, "percentage": 51.63, "elapsed_time": "0:28:13", "remaining_time": "0:26:26", "throughput": 2063.89, "total_tokens": 3494848}
1717
+ {"current_steps": 8535, "total_steps": 16520, "loss": 0.1976, "lr": 0.016745591046974644, "epoch": 10.332929782082324, "percentage": 51.66, "elapsed_time": "0:28:14", "remaining_time": "0:26:25", "throughput": 2063.97, "total_tokens": 3496896}
1718
+ {"current_steps": 8540, "total_steps": 16520, "loss": 0.1778, "lr": 0.01672985032867213, "epoch": 10.338983050847457, "percentage": 51.69, "elapsed_time": "0:28:15", "remaining_time": "0:26:24", "throughput": 2064.01, "total_tokens": 3498944}
1719
+ {"current_steps": 8545, "total_steps": 16520, "loss": 0.1982, "lr": 0.016714107679543618, "epoch": 10.345036319612591, "percentage": 51.73, "elapsed_time": "0:28:16", "remaining_time": "0:26:22", "throughput": 2064.11, "total_tokens": 3500992}
1720
+ {"current_steps": 8550, "total_steps": 16520, "loss": 0.2367, "lr": 0.016698363117160758, "epoch": 10.351089588377723, "percentage": 51.76, "elapsed_time": "0:28:17", "remaining_time": "0:26:21", "throughput": 2064.17, "total_tokens": 3503072}
1721
+ {"current_steps": 8555, "total_steps": 16520, "loss": 0.2074, "lr": 0.01668261665909733, "epoch": 10.357142857142858, "percentage": 51.79, "elapsed_time": "0:28:18", "remaining_time": "0:26:20", "throughput": 2064.2, "total_tokens": 3505024}
1722
+ {"current_steps": 8560, "total_steps": 16520, "loss": 0.1965, "lr": 0.01666686832292923, "epoch": 10.36319612590799, "percentage": 51.82, "elapsed_time": "0:28:18", "remaining_time": "0:26:19", "throughput": 2064.27, "total_tokens": 3507072}
1723
+ {"current_steps": 8565, "total_steps": 16520, "loss": 0.2403, "lr": 0.016651118126234446, "epoch": 10.369249394673124, "percentage": 51.85, "elapsed_time": "0:28:19", "remaining_time": "0:26:18", "throughput": 2064.33, "total_tokens": 3509152}
1724
+ {"current_steps": 8570, "total_steps": 16520, "loss": 0.2024, "lr": 0.016635366086593054, "epoch": 10.375302663438257, "percentage": 51.88, "elapsed_time": "0:28:20", "remaining_time": "0:26:17", "throughput": 2064.46, "total_tokens": 3511360}
1725
+ {"current_steps": 8575, "total_steps": 16520, "loss": 0.2201, "lr": 0.01661961222158718, "epoch": 10.38135593220339, "percentage": 51.91, "elapsed_time": "0:28:21", "remaining_time": "0:26:16", "throughput": 2064.5, "total_tokens": 3513408}
1726
+ {"current_steps": 8580, "total_steps": 16520, "loss": 0.1411, "lr": 0.016603856548800987, "epoch": 10.387409200968523, "percentage": 51.94, "elapsed_time": "0:28:22", "remaining_time": "0:26:15", "throughput": 2064.51, "total_tokens": 3515328}
1727
+ {"current_steps": 8585, "total_steps": 16520, "loss": 0.2135, "lr": 0.01658809908582065, "epoch": 10.393462469733656, "percentage": 51.97, "elapsed_time": "0:28:23", "remaining_time": "0:26:14", "throughput": 2064.58, "total_tokens": 3517376}
1728
+ {"current_steps": 8590, "total_steps": 16520, "loss": 0.1759, "lr": 0.01657233985023436, "epoch": 10.39951573849879, "percentage": 52.0, "elapsed_time": "0:28:24", "remaining_time": "0:26:13", "throughput": 2064.7, "total_tokens": 3519584}
1729
+ {"current_steps": 8595, "total_steps": 16520, "loss": 0.1744, "lr": 0.01655657885963227, "epoch": 10.405569007263923, "percentage": 52.03, "elapsed_time": "0:28:25", "remaining_time": "0:26:12", "throughput": 2064.75, "total_tokens": 3521632}
1730
+ {"current_steps": 8600, "total_steps": 16520, "loss": 0.1638, "lr": 0.016540816131606503, "epoch": 10.411622276029055, "percentage": 52.06, "elapsed_time": "0:28:26", "remaining_time": "0:26:11", "throughput": 2064.89, "total_tokens": 3523840}
1731
+ {"current_steps": 8605, "total_steps": 16520, "loss": 0.1528, "lr": 0.016525051683751107, "epoch": 10.41767554479419, "percentage": 52.09, "elapsed_time": "0:28:27", "remaining_time": "0:26:10", "throughput": 2064.97, "total_tokens": 3525952}
1732
+ {"current_steps": 8610, "total_steps": 16520, "loss": 0.1982, "lr": 0.016509285533662064, "epoch": 10.423728813559322, "percentage": 52.12, "elapsed_time": "0:28:28", "remaining_time": "0:26:09", "throughput": 2065.09, "total_tokens": 3528160}
1733
+ {"current_steps": 8615, "total_steps": 16520, "loss": 0.1929, "lr": 0.01649351769893725, "epoch": 10.429782082324456, "percentage": 52.15, "elapsed_time": "0:28:29", "remaining_time": "0:26:08", "throughput": 2065.16, "total_tokens": 3530272}
1734
+ {"current_steps": 8620, "total_steps": 16520, "loss": 0.1872, "lr": 0.01647774819717642, "epoch": 10.435835351089588, "percentage": 52.18, "elapsed_time": "0:28:30", "remaining_time": "0:26:07", "throughput": 2065.22, "total_tokens": 3532352}
1735
+ {"current_steps": 8625, "total_steps": 16520, "loss": 0.1709, "lr": 0.016461977045981197, "epoch": 10.441888619854721, "percentage": 52.21, "elapsed_time": "0:28:31", "remaining_time": "0:26:06", "throughput": 2065.23, "total_tokens": 3534336}
1736
+ {"current_steps": 8630, "total_steps": 16520, "loss": 0.1832, "lr": 0.016446204262955032, "epoch": 10.447941888619855, "percentage": 52.24, "elapsed_time": "0:28:32", "remaining_time": "0:26:05", "throughput": 2065.26, "total_tokens": 3536288}
1737
+ {"current_steps": 8635, "total_steps": 16520, "loss": 0.2268, "lr": 0.016430429865703207, "epoch": 10.453995157384988, "percentage": 52.27, "elapsed_time": "0:28:33", "remaining_time": "0:26:04", "throughput": 2065.3, "total_tokens": 3538336}
1738
+ {"current_steps": 8640, "total_steps": 16520, "loss": 0.1924, "lr": 0.016414653871832812, "epoch": 10.460048426150122, "percentage": 52.3, "elapsed_time": "0:28:34", "remaining_time": "0:26:03", "throughput": 2065.38, "total_tokens": 3540448}
1739
+ {"current_steps": 8645, "total_steps": 16520, "loss": 0.1707, "lr": 0.0163988762989527, "epoch": 10.466101694915254, "percentage": 52.33, "elapsed_time": "0:28:35", "remaining_time": "0:26:02", "throughput": 2065.42, "total_tokens": 3542432}
1740
+ {"current_steps": 8650, "total_steps": 16520, "loss": 0.178, "lr": 0.016383097164673503, "epoch": 10.472154963680387, "percentage": 52.36, "elapsed_time": "0:28:36", "remaining_time": "0:26:01", "throughput": 2065.46, "total_tokens": 3544352}
1741
+ {"current_steps": 8655, "total_steps": 16520, "loss": 0.1732, "lr": 0.01636731648660759, "epoch": 10.478208232445521, "percentage": 52.39, "elapsed_time": "0:28:36", "remaining_time": "0:26:00", "throughput": 2065.52, "total_tokens": 3546400}
1742
+ {"current_steps": 8660, "total_steps": 16520, "loss": 0.1924, "lr": 0.016351534282369053, "epoch": 10.484261501210653, "percentage": 52.42, "elapsed_time": "0:28:37", "remaining_time": "0:25:59", "throughput": 2065.62, "total_tokens": 3548576}
1743
+ {"current_steps": 8665, "total_steps": 16520, "loss": 0.1791, "lr": 0.016335750569573686, "epoch": 10.490314769975788, "percentage": 52.45, "elapsed_time": "0:28:38", "remaining_time": "0:25:58", "throughput": 2065.68, "total_tokens": 3550592}
1744
+ {"current_steps": 8670, "total_steps": 16520, "loss": 0.1843, "lr": 0.01631996536583897, "epoch": 10.49636803874092, "percentage": 52.48, "elapsed_time": "0:28:39", "remaining_time": "0:25:57", "throughput": 2065.72, "total_tokens": 3552576}
1745
+ {"current_steps": 8675, "total_steps": 16520, "loss": 0.1762, "lr": 0.016304178688784046, "epoch": 10.502421307506053, "percentage": 52.51, "elapsed_time": "0:28:40", "remaining_time": "0:25:56", "throughput": 2065.75, "total_tokens": 3554592}
1746
+ {"current_steps": 8680, "total_steps": 16520, "loss": 0.1794, "lr": 0.016288390556029708, "epoch": 10.508474576271187, "percentage": 52.54, "elapsed_time": "0:28:41", "remaining_time": "0:25:55", "throughput": 2065.84, "total_tokens": 3556672}
1747
+ {"current_steps": 8685, "total_steps": 16520, "loss": 0.1725, "lr": 0.016272600985198366, "epoch": 10.51452784503632, "percentage": 52.57, "elapsed_time": "0:28:42", "remaining_time": "0:25:54", "throughput": 2065.93, "total_tokens": 3558784}
1748
+ {"current_steps": 8690, "total_steps": 16520, "loss": 0.1348, "lr": 0.016256809993914035, "epoch": 10.520581113801454, "percentage": 52.6, "elapsed_time": "0:28:43", "remaining_time": "0:25:52", "throughput": 2065.99, "total_tokens": 3560800}
1749
+ {"current_steps": 8695, "total_steps": 16520, "loss": 0.188, "lr": 0.01624101759980233, "epoch": 10.526634382566586, "percentage": 52.63, "elapsed_time": "0:28:44", "remaining_time": "0:25:51", "throughput": 2066.06, "total_tokens": 3562912}
1750
+ {"current_steps": 8700, "total_steps": 16520, "loss": 0.1588, "lr": 0.016225223820490405, "epoch": 10.532687651331718, "percentage": 52.66, "elapsed_time": "0:28:45", "remaining_time": "0:25:50", "throughput": 2066.1, "total_tokens": 3564960}
1751
+ {"current_steps": 8705, "total_steps": 16520, "loss": 0.2174, "lr": 0.016209428673606988, "epoch": 10.538740920096853, "percentage": 52.69, "elapsed_time": "0:28:46", "remaining_time": "0:25:49", "throughput": 2066.13, "total_tokens": 3566848}
1752
+ {"current_steps": 8710, "total_steps": 16520, "loss": 0.2252, "lr": 0.016193632176782314, "epoch": 10.544794188861985, "percentage": 52.72, "elapsed_time": "0:28:47", "remaining_time": "0:25:48", "throughput": 2066.2, "total_tokens": 3568832}
1753
+ {"current_steps": 8715, "total_steps": 16520, "loss": 0.2306, "lr": 0.016177834347648135, "epoch": 10.55084745762712, "percentage": 52.75, "elapsed_time": "0:28:48", "remaining_time": "0:25:47", "throughput": 2066.29, "total_tokens": 3570848}
1754
+ {"current_steps": 8720, "total_steps": 16520, "loss": 0.2123, "lr": 0.016162035203837688, "epoch": 10.556900726392252, "percentage": 52.78, "elapsed_time": "0:28:49", "remaining_time": "0:25:46", "throughput": 2066.3, "total_tokens": 3572800}
1755
+ {"current_steps": 8725, "total_steps": 16520, "loss": 0.1982, "lr": 0.016146234762985674, "epoch": 10.562953995157384, "percentage": 52.81, "elapsed_time": "0:28:50", "remaining_time": "0:25:45", "throughput": 2066.37, "total_tokens": 3574912}
1756
+ {"current_steps": 8730, "total_steps": 16520, "loss": 0.2131, "lr": 0.016130433042728244, "epoch": 10.569007263922519, "percentage": 52.85, "elapsed_time": "0:28:50", "remaining_time": "0:25:44", "throughput": 2066.41, "total_tokens": 3576896}
1757
+ {"current_steps": 8735, "total_steps": 16520, "loss": 0.2033, "lr": 0.016114630060702986, "epoch": 10.575060532687651, "percentage": 52.88, "elapsed_time": "0:28:51", "remaining_time": "0:25:43", "throughput": 2066.5, "total_tokens": 3578976}
1758
+ {"current_steps": 8740, "total_steps": 16520, "loss": 0.1812, "lr": 0.01609882583454887, "epoch": 10.581113801452785, "percentage": 52.91, "elapsed_time": "0:28:52", "remaining_time": "0:25:42", "throughput": 2066.52, "total_tokens": 3580928}
1759
+ {"current_steps": 8745, "total_steps": 16520, "loss": 0.1521, "lr": 0.016083020381906288, "epoch": 10.587167070217918, "percentage": 52.94, "elapsed_time": "0:28:53", "remaining_time": "0:25:41", "throughput": 2066.59, "total_tokens": 3582976}
1760
+ {"current_steps": 8750, "total_steps": 16520, "loss": 0.2597, "lr": 0.016067213720416976, "epoch": 10.59322033898305, "percentage": 52.97, "elapsed_time": "0:28:54", "remaining_time": "0:25:40", "throughput": 2066.7, "total_tokens": 3585184}
1761
+ {"current_steps": 8755, "total_steps": 16520, "loss": 0.1964, "lr": 0.016051405867724036, "epoch": 10.599273607748184, "percentage": 53.0, "elapsed_time": "0:28:55", "remaining_time": "0:25:39", "throughput": 2066.79, "total_tokens": 3587200}
1762
+ {"current_steps": 8760, "total_steps": 16520, "loss": 0.1808, "lr": 0.016035596841471888, "epoch": 10.605326876513317, "percentage": 53.03, "elapsed_time": "0:28:56", "remaining_time": "0:25:38", "throughput": 2066.86, "total_tokens": 3589312}
1763
+ {"current_steps": 8765, "total_steps": 16520, "loss": 0.1668, "lr": 0.016019786659306267, "epoch": 10.611380145278451, "percentage": 53.06, "elapsed_time": "0:28:57", "remaining_time": "0:25:37", "throughput": 2066.92, "total_tokens": 3591392}
1764
+ {"current_steps": 8770, "total_steps": 16520, "loss": 0.2246, "lr": 0.016003975338874193, "epoch": 10.617433414043584, "percentage": 53.09, "elapsed_time": "0:28:58", "remaining_time": "0:25:36", "throughput": 2067.02, "total_tokens": 3593440}
1765
+ {"current_steps": 8775, "total_steps": 16520, "loss": 0.2238, "lr": 0.015988162897823965, "epoch": 10.623486682808716, "percentage": 53.12, "elapsed_time": "0:28:59", "remaining_time": "0:25:35", "throughput": 2067.01, "total_tokens": 3595328}
1766
+ {"current_steps": 8780, "total_steps": 16520, "loss": 0.1955, "lr": 0.01597234935380513, "epoch": 10.62953995157385, "percentage": 53.15, "elapsed_time": "0:29:00", "remaining_time": "0:25:34", "throughput": 2067.1, "total_tokens": 3597472}
1767
+ {"current_steps": 8785, "total_steps": 16520, "loss": 0.2249, "lr": 0.015956534724468462, "epoch": 10.635593220338983, "percentage": 53.18, "elapsed_time": "0:29:01", "remaining_time": "0:25:33", "throughput": 2067.19, "total_tokens": 3599584}
1768
+ {"current_steps": 8790, "total_steps": 16520, "loss": 0.2108, "lr": 0.01594071902746595, "epoch": 10.641646489104117, "percentage": 53.21, "elapsed_time": "0:29:02", "remaining_time": "0:25:32", "throughput": 2067.22, "total_tokens": 3601600}
1769
+ {"current_steps": 8795, "total_steps": 16520, "loss": 0.1488, "lr": 0.01592490228045077, "epoch": 10.64769975786925, "percentage": 53.24, "elapsed_time": "0:29:03", "remaining_time": "0:25:31", "throughput": 2067.27, "total_tokens": 3603680}
1770
+ {"current_steps": 8800, "total_steps": 16520, "loss": 0.2267, "lr": 0.015909084501077284, "epoch": 10.653753026634382, "percentage": 53.27, "elapsed_time": "0:29:04", "remaining_time": "0:25:30", "throughput": 2067.31, "total_tokens": 3605728}
1771
+ {"current_steps": 8805, "total_steps": 16520, "loss": 0.216, "lr": 0.015893265707000986, "epoch": 10.659806295399516, "percentage": 53.3, "elapsed_time": "0:29:05", "remaining_time": "0:25:29", "throughput": 2067.34, "total_tokens": 3607680}
1772
+ {"current_steps": 8810, "total_steps": 16520, "loss": 0.1941, "lr": 0.01587744591587852, "epoch": 10.665859564164649, "percentage": 53.33, "elapsed_time": "0:29:06", "remaining_time": "0:25:28", "throughput": 2067.41, "total_tokens": 3609728}
1773
+ {"current_steps": 8815, "total_steps": 16520, "loss": 0.1464, "lr": 0.015861625145367625, "epoch": 10.671912832929783, "percentage": 53.36, "elapsed_time": "0:29:06", "remaining_time": "0:25:26", "throughput": 2067.49, "total_tokens": 3611808}
1774
+ {"current_steps": 8820, "total_steps": 16520, "loss": 0.1879, "lr": 0.01584580341312716, "epoch": 10.677966101694915, "percentage": 53.39, "elapsed_time": "0:29:07", "remaining_time": "0:25:25", "throughput": 2067.52, "total_tokens": 3613696}
1775
+ {"current_steps": 8825, "total_steps": 16520, "loss": 0.1974, "lr": 0.015829980736817025, "epoch": 10.684019370460048, "percentage": 53.42, "elapsed_time": "0:29:08", "remaining_time": "0:25:24", "throughput": 2067.57, "total_tokens": 3615776}
1776
+ {"current_steps": 8830, "total_steps": 16520, "loss": 0.1819, "lr": 0.015814157134098197, "epoch": 10.690072639225182, "percentage": 53.45, "elapsed_time": "0:29:09", "remaining_time": "0:25:23", "throughput": 2067.65, "total_tokens": 3617824}
1777
+ {"current_steps": 8835, "total_steps": 16520, "loss": 0.202, "lr": 0.01579833262263268, "epoch": 10.696125907990314, "percentage": 53.48, "elapsed_time": "0:29:10", "remaining_time": "0:25:22", "throughput": 2067.69, "total_tokens": 3619808}
1778
+ {"current_steps": 8840, "total_steps": 16520, "loss": 0.2026, "lr": 0.015782507220083494, "epoch": 10.702179176755449, "percentage": 53.51, "elapsed_time": "0:29:11", "remaining_time": "0:25:21", "throughput": 2067.86, "total_tokens": 3622176}
1779
+ {"current_steps": 8845, "total_steps": 16520, "loss": 0.1775, "lr": 0.01576668094411465, "epoch": 10.708232445520581, "percentage": 53.54, "elapsed_time": "0:29:12", "remaining_time": "0:25:20", "throughput": 2067.88, "total_tokens": 3624192}
1780
+ {"current_steps": 8850, "total_steps": 16520, "loss": 0.1741, "lr": 0.015750853812391136, "epoch": 10.714285714285714, "percentage": 53.57, "elapsed_time": "0:29:13", "remaining_time": "0:25:19", "throughput": 2067.99, "total_tokens": 3626400}
1781
+ {"current_steps": 8855, "total_steps": 16520, "loss": 0.2135, "lr": 0.015735025842578892, "epoch": 10.720338983050848, "percentage": 53.6, "elapsed_time": "0:29:14", "remaining_time": "0:25:18", "throughput": 2068.02, "total_tokens": 3628256}
1782
+ {"current_steps": 8860, "total_steps": 16520, "loss": 0.1946, "lr": 0.015719197052344803, "epoch": 10.72639225181598, "percentage": 53.63, "elapsed_time": "0:29:15", "remaining_time": "0:25:17", "throughput": 2068.1, "total_tokens": 3630304}
1783
+ {"current_steps": 8865, "total_steps": 16520, "loss": 0.2122, "lr": 0.01570336745935666, "epoch": 10.732445520581114, "percentage": 53.66, "elapsed_time": "0:29:16", "remaining_time": "0:25:16", "throughput": 2068.14, "total_tokens": 3632352}
1784
+ {"current_steps": 8870, "total_steps": 16520, "loss": 0.2023, "lr": 0.015687537081283152, "epoch": 10.738498789346247, "percentage": 53.69, "elapsed_time": "0:29:17", "remaining_time": "0:25:15", "throughput": 2068.15, "total_tokens": 3634336}
1785
+ {"current_steps": 8875, "total_steps": 16520, "loss": 0.1492, "lr": 0.015671705935793846, "epoch": 10.74455205811138, "percentage": 53.72, "elapsed_time": "0:29:18", "remaining_time": "0:25:14", "throughput": 2068.23, "total_tokens": 3636416}
1786
+ {"current_steps": 8880, "total_steps": 16520, "loss": 0.2237, "lr": 0.015655874040559167, "epoch": 10.750605326876514, "percentage": 53.75, "elapsed_time": "0:29:19", "remaining_time": "0:25:13", "throughput": 2068.29, "total_tokens": 3638432}
1787
+ {"current_steps": 8885, "total_steps": 16520, "loss": 0.1393, "lr": 0.015640041413250375, "epoch": 10.756658595641646, "percentage": 53.78, "elapsed_time": "0:29:20", "remaining_time": "0:25:12", "throughput": 2068.34, "total_tokens": 3640512}
1788
+ {"current_steps": 8890, "total_steps": 16520, "loss": 0.2073, "lr": 0.015624208071539542, "epoch": 10.76271186440678, "percentage": 53.81, "elapsed_time": "0:29:21", "remaining_time": "0:25:11", "throughput": 2068.43, "total_tokens": 3642656}
1789
+ {"current_steps": 8895, "total_steps": 16520, "loss": 0.2478, "lr": 0.015608374033099549, "epoch": 10.768765133171913, "percentage": 53.84, "elapsed_time": "0:29:21", "remaining_time": "0:25:10", "throughput": 2068.51, "total_tokens": 3644672}
1790
+ {"current_steps": 8900, "total_steps": 16520, "loss": 0.1717, "lr": 0.015592539315604047, "epoch": 10.774818401937045, "percentage": 53.87, "elapsed_time": "0:29:22", "remaining_time": "0:25:09", "throughput": 2068.56, "total_tokens": 3646720}
1791
+ {"current_steps": 8905, "total_steps": 16520, "loss": 0.1854, "lr": 0.015576703936727441, "epoch": 10.78087167070218, "percentage": 53.9, "elapsed_time": "0:29:23", "remaining_time": "0:25:08", "throughput": 2068.6, "total_tokens": 3648768}
1792
+ {"current_steps": 8910, "total_steps": 16520, "loss": 0.1746, "lr": 0.015560867914144887, "epoch": 10.786924939467312, "percentage": 53.93, "elapsed_time": "0:29:24", "remaining_time": "0:25:07", "throughput": 2068.65, "total_tokens": 3650848}
1793
+ {"current_steps": 8915, "total_steps": 16520, "loss": 0.1945, "lr": 0.015545031265532243, "epoch": 10.792978208232446, "percentage": 53.96, "elapsed_time": "0:29:25", "remaining_time": "0:25:06", "throughput": 2068.69, "total_tokens": 3652896}
1794
+ {"current_steps": 8920, "total_steps": 16520, "loss": 0.1368, "lr": 0.015529194008566083, "epoch": 10.799031476997579, "percentage": 54.0, "elapsed_time": "0:29:26", "remaining_time": "0:25:05", "throughput": 2068.73, "total_tokens": 3654944}
1795
+ {"current_steps": 8925, "total_steps": 16520, "loss": 0.2249, "lr": 0.015513356160923647, "epoch": 10.805084745762711, "percentage": 54.03, "elapsed_time": "0:29:27", "remaining_time": "0:25:04", "throughput": 2068.82, "total_tokens": 3657120}
1796
+ {"current_steps": 8930, "total_steps": 16520, "loss": 0.211, "lr": 0.01549751774028284, "epoch": 10.811138014527845, "percentage": 54.06, "elapsed_time": "0:29:28", "remaining_time": "0:25:03", "throughput": 2068.88, "total_tokens": 3659200}
1797
+ {"current_steps": 8935, "total_steps": 16520, "loss": 0.23, "lr": 0.015481678764322201, "epoch": 10.817191283292978, "percentage": 54.09, "elapsed_time": "0:29:29", "remaining_time": "0:25:02", "throughput": 2068.93, "total_tokens": 3661216}
1798
+ {"current_steps": 8940, "total_steps": 16520, "loss": 0.176, "lr": 0.015465839250720904, "epoch": 10.823244552058112, "percentage": 54.12, "elapsed_time": "0:29:30", "remaining_time": "0:25:01", "throughput": 2068.97, "total_tokens": 3663264}
1799
+ {"current_steps": 8945, "total_steps": 16520, "loss": 0.1902, "lr": 0.015449999217158705, "epoch": 10.829297820823244, "percentage": 54.15, "elapsed_time": "0:29:31", "remaining_time": "0:25:00", "throughput": 2069.03, "total_tokens": 3665344}
1800
+ {"current_steps": 8950, "total_steps": 16520, "loss": 0.2129, "lr": 0.015434158681315951, "epoch": 10.835351089588377, "percentage": 54.18, "elapsed_time": "0:29:32", "remaining_time": "0:24:59", "throughput": 2069.08, "total_tokens": 3667296}
1801
+ {"current_steps": 8955, "total_steps": 16520, "loss": 0.1904, "lr": 0.015418317660873539, "epoch": 10.841404358353511, "percentage": 54.21, "elapsed_time": "0:29:33", "remaining_time": "0:24:58", "throughput": 2069.12, "total_tokens": 3669280}
1802
+ {"current_steps": 8960, "total_steps": 16520, "loss": 0.1838, "lr": 0.015402476173512925, "epoch": 10.847457627118644, "percentage": 54.24, "elapsed_time": "0:29:34", "remaining_time": "0:24:57", "throughput": 2069.17, "total_tokens": 3671360}
1803
+ {"current_steps": 8965, "total_steps": 16520, "loss": 0.1921, "lr": 0.01538663423691607, "epoch": 10.853510895883778, "percentage": 54.27, "elapsed_time": "0:29:35", "remaining_time": "0:24:56", "throughput": 2069.21, "total_tokens": 3673344}
1804
+ {"current_steps": 8970, "total_steps": 16520, "loss": 0.1639, "lr": 0.015370791868765441, "epoch": 10.85956416464891, "percentage": 54.3, "elapsed_time": "0:29:36", "remaining_time": "0:24:55", "throughput": 2069.28, "total_tokens": 3675456}
1805
+ {"current_steps": 8975, "total_steps": 16520, "loss": 0.1595, "lr": 0.015354949086743988, "epoch": 10.865617433414045, "percentage": 54.33, "elapsed_time": "0:29:37", "remaining_time": "0:24:54", "throughput": 2069.34, "total_tokens": 3677568}
1806
+ {"current_steps": 8980, "total_steps": 16520, "loss": 0.1611, "lr": 0.015339105908535128, "epoch": 10.871670702179177, "percentage": 54.36, "elapsed_time": "0:29:38", "remaining_time": "0:24:52", "throughput": 2069.36, "total_tokens": 3679520}
1807
+ {"current_steps": 8985, "total_steps": 16520, "loss": 0.2158, "lr": 0.015323262351822707, "epoch": 10.87772397094431, "percentage": 54.39, "elapsed_time": "0:29:39", "remaining_time": "0:24:51", "throughput": 2069.41, "total_tokens": 3681600}
1808
+ {"current_steps": 8990, "total_steps": 16520, "loss": 0.1931, "lr": 0.015307418434291006, "epoch": 10.883777239709444, "percentage": 54.42, "elapsed_time": "0:29:39", "remaining_time": "0:24:50", "throughput": 2069.45, "total_tokens": 3683520}
1809
+ {"current_steps": 8995, "total_steps": 16520, "loss": 0.2243, "lr": 0.0152915741736247, "epoch": 10.889830508474576, "percentage": 54.45, "elapsed_time": "0:29:40", "remaining_time": "0:24:49", "throughput": 2069.5, "total_tokens": 3685536}
1810
+ {"current_steps": 9000, "total_steps": 16520, "loss": 0.2072, "lr": 0.015275729587508856, "epoch": 10.89588377723971, "percentage": 54.48, "elapsed_time": "0:29:41", "remaining_time": "0:24:48", "throughput": 2069.53, "total_tokens": 3687552}
1811
+ {"current_steps": 9005, "total_steps": 16520, "loss": 0.197, "lr": 0.015259884693628897, "epoch": 10.901937046004843, "percentage": 54.51, "elapsed_time": "0:29:42", "remaining_time": "0:24:47", "throughput": 2069.57, "total_tokens": 3689632}
1812
+ {"current_steps": 9010, "total_steps": 16520, "loss": 0.1395, "lr": 0.015244039509670591, "epoch": 10.907990314769975, "percentage": 54.54, "elapsed_time": "0:29:43", "remaining_time": "0:24:46", "throughput": 2069.62, "total_tokens": 3691648}
1813
+ {"current_steps": 9015, "total_steps": 16520, "loss": 0.2031, "lr": 0.015228194053320031, "epoch": 10.91404358353511, "percentage": 54.57, "elapsed_time": "0:29:44", "remaining_time": "0:24:45", "throughput": 2069.67, "total_tokens": 3693728}
1814
+ {"current_steps": 9020, "total_steps": 16520, "loss": 0.2037, "lr": 0.015212348342263617, "epoch": 10.920096852300242, "percentage": 54.6, "elapsed_time": "0:29:45", "remaining_time": "0:24:44", "throughput": 2069.74, "total_tokens": 3695840}
1815
+ {"current_steps": 9025, "total_steps": 16520, "loss": 0.1432, "lr": 0.015196502394188025, "epoch": 10.926150121065376, "percentage": 54.63, "elapsed_time": "0:29:46", "remaining_time": "0:24:43", "throughput": 2069.77, "total_tokens": 3697856}
1816
+ {"current_steps": 9030, "total_steps": 16520, "loss": 0.1827, "lr": 0.015180656226780205, "epoch": 10.932203389830509, "percentage": 54.66, "elapsed_time": "0:29:47", "remaining_time": "0:24:42", "throughput": 2069.84, "total_tokens": 3699968}
1817
+ {"current_steps": 9035, "total_steps": 16520, "loss": 0.2027, "lr": 0.015164809857727342, "epoch": 10.938256658595641, "percentage": 54.69, "elapsed_time": "0:29:48", "remaining_time": "0:24:41", "throughput": 2069.94, "total_tokens": 3702080}
1818
+ {"current_steps": 9040, "total_steps": 16520, "loss": 0.2117, "lr": 0.015148963304716854, "epoch": 10.944309927360775, "percentage": 54.72, "elapsed_time": "0:29:49", "remaining_time": "0:24:40", "throughput": 2069.96, "total_tokens": 3704032}
1819
+ {"current_steps": 9045, "total_steps": 16520, "loss": 0.2108, "lr": 0.015133116585436363, "epoch": 10.950363196125908, "percentage": 54.75, "elapsed_time": "0:29:50", "remaining_time": "0:24:39", "throughput": 2070.04, "total_tokens": 3706176}
1820
+ {"current_steps": 9050, "total_steps": 16520, "loss": 0.2072, "lr": 0.015117269717573682, "epoch": 10.956416464891042, "percentage": 54.78, "elapsed_time": "0:29:51", "remaining_time": "0:24:38", "throughput": 2070.06, "total_tokens": 3708128}
1821
+ {"current_steps": 9055, "total_steps": 16520, "loss": 0.2045, "lr": 0.015101422718816766, "epoch": 10.962469733656174, "percentage": 54.81, "elapsed_time": "0:29:52", "remaining_time": "0:24:37", "throughput": 2070.08, "total_tokens": 3710080}
1822
+ {"current_steps": 9060, "total_steps": 16520, "loss": 0.1709, "lr": 0.015085575606853745, "epoch": 10.968523002421307, "percentage": 54.84, "elapsed_time": "0:29:53", "remaining_time": "0:24:36", "throughput": 2070.14, "total_tokens": 3712096}
1823
+ {"current_steps": 9065, "total_steps": 16520, "loss": 0.177, "lr": 0.01506972839937286, "epoch": 10.974576271186441, "percentage": 54.87, "elapsed_time": "0:29:54", "remaining_time": "0:24:35", "throughput": 2070.21, "total_tokens": 3714208}
1824
+ {"current_steps": 9070, "total_steps": 16520, "loss": 0.1786, "lr": 0.015053881114062466, "epoch": 10.980629539951574, "percentage": 54.9, "elapsed_time": "0:29:55", "remaining_time": "0:24:34", "throughput": 2070.31, "total_tokens": 3716384}
1825
+ {"current_steps": 9075, "total_steps": 16520, "loss": 0.1694, "lr": 0.015038033768610994, "epoch": 10.986682808716708, "percentage": 54.93, "elapsed_time": "0:29:56", "remaining_time": "0:24:33", "throughput": 2070.36, "total_tokens": 3718464}
1826
+ {"current_steps": 9080, "total_steps": 16520, "loss": 0.1849, "lr": 0.015022186380706959, "epoch": 10.99273607748184, "percentage": 54.96, "elapsed_time": "0:29:57", "remaining_time": "0:24:32", "throughput": 2070.36, "total_tokens": 3720448}
1827
+ {"current_steps": 9085, "total_steps": 16520, "loss": 0.1866, "lr": 0.015006338968038907, "epoch": 10.998789346246973, "percentage": 54.99, "elapsed_time": "0:29:57", "remaining_time": "0:24:31", "throughput": 2070.42, "total_tokens": 3722432}
1828
+ {"current_steps": 9086, "total_steps": 16520, "eval_loss": 0.19022826850414276, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:30:05", "remaining_time": "0:24:37", "throughput": 2061.42, "total_tokens": 3722552}
1829
+ {"current_steps": 9090, "total_steps": 16520, "loss": 0.1503, "lr": 0.014990491548295418, "epoch": 11.004842615012107, "percentage": 55.02, "elapsed_time": "0:30:07", "remaining_time": "0:24:37", "throughput": 2060.47, "total_tokens": 3724216}
1830
+ {"current_steps": 9095, "total_steps": 16520, "loss": 0.2154, "lr": 0.01497464413916508, "epoch": 11.01089588377724, "percentage": 55.05, "elapsed_time": "0:30:08", "remaining_time": "0:24:36", "throughput": 2060.51, "total_tokens": 3726232}
1831
+ {"current_steps": 9100, "total_steps": 16520, "loss": 0.3186, "lr": 0.014958796758336477, "epoch": 11.016949152542374, "percentage": 55.08, "elapsed_time": "0:30:09", "remaining_time": "0:24:35", "throughput": 2060.55, "total_tokens": 3728280}