rbelanec commited on
Commit
8931a14
·
verified ·
1 Parent(s): 50832f7

Training in progress, step 9960

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +198 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d9917681bbf124a37238a7388880d13d20e4b87d27c24533524e60ad2a50a9c
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caeebda46c8c536201990871c0f48980b50cd46eeec65b79f40fa2ff5dd03a72
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -1803,3 +1803,201 @@
1803
  {"current_steps": 8970, "total_steps": 9960, "loss": 0.0, "lr": 2.985474358588658e-07, "epoch": 18.012048192771083, "percentage": 90.06, "elapsed_time": "0:23:23", "remaining_time": "0:02:34", "throughput": 3987.5, "total_tokens": 5598368}
1804
  {"current_steps": 8975, "total_steps": 9960, "loss": 0.0, "lr": 2.9557241234973446e-07, "epoch": 18.022088353413654, "percentage": 90.11, "elapsed_time": "0:23:24", "remaining_time": "0:02:34", "throughput": 3987.62, "total_tokens": 5601664}
1805
  {"current_steps": 8980, "total_steps": 9960, "loss": 0.0, "lr": 2.926118346784379e-07, "epoch": 18.032128514056225, "percentage": 90.16, "elapsed_time": "0:23:25", "remaining_time": "0:02:33", "throughput": 3987.77, "total_tokens": 5604736}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1803
  {"current_steps": 8970, "total_steps": 9960, "loss": 0.0, "lr": 2.985474358588658e-07, "epoch": 18.012048192771083, "percentage": 90.06, "elapsed_time": "0:23:23", "remaining_time": "0:02:34", "throughput": 3987.5, "total_tokens": 5598368}
1804
  {"current_steps": 8975, "total_steps": 9960, "loss": 0.0, "lr": 2.9557241234973446e-07, "epoch": 18.022088353413654, "percentage": 90.11, "elapsed_time": "0:23:24", "remaining_time": "0:02:34", "throughput": 3987.62, "total_tokens": 5601664}
1805
  {"current_steps": 8980, "total_steps": 9960, "loss": 0.0, "lr": 2.926118346784379e-07, "epoch": 18.032128514056225, "percentage": 90.16, "elapsed_time": "0:23:25", "remaining_time": "0:02:33", "throughput": 3987.77, "total_tokens": 5604736}
1806
+ {"current_steps": 8985, "total_steps": 9960, "loss": 0.0, "lr": 2.8966571193599304e-07, "epoch": 18.042168674698797, "percentage": 90.21, "elapsed_time": "0:23:26", "remaining_time": "0:02:32", "throughput": 3987.93, "total_tokens": 5607936}
1807
+ {"current_steps": 8990, "total_steps": 9960, "loss": 0.0157, "lr": 2.8673405316902824e-07, "epoch": 18.052208835341364, "percentage": 90.26, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.03, "total_tokens": 5611200}
1808
+ {"current_steps": 8995, "total_steps": 9960, "loss": 0.0, "lr": 2.8381686737975867e-07, "epoch": 18.062248995983936, "percentage": 90.31, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.13, "total_tokens": 5613856}
1809
+ {"current_steps": 9000, "total_steps": 9960, "loss": 0.0002, "lr": 2.809141635259555e-07, "epoch": 18.072289156626507, "percentage": 90.36, "elapsed_time": "0:23:28", "remaining_time": "0:02:30", "throughput": 3988.26, "total_tokens": 5617152}
1810
+ {"current_steps": 9005, "total_steps": 9960, "loss": 0.0, "lr": 2.780259505209249e-07, "epoch": 18.082329317269075, "percentage": 90.41, "elapsed_time": "0:23:29", "remaining_time": "0:02:29", "throughput": 3988.37, "total_tokens": 5620160}
1811
+ {"current_steps": 9010, "total_steps": 9960, "loss": 0.0, "lr": 2.7515223723346974e-07, "epoch": 18.092369477911646, "percentage": 90.46, "elapsed_time": "0:23:29", "remaining_time": "0:02:28", "throughput": 3988.55, "total_tokens": 5623424}
1812
+ {"current_steps": 9015, "total_steps": 9960, "loss": 0.0, "lr": 2.722930324878748e-07, "epoch": 18.102409638554217, "percentage": 90.51, "elapsed_time": "0:23:30", "remaining_time": "0:02:27", "throughput": 3988.54, "total_tokens": 5626208}
1813
+ {"current_steps": 9020, "total_steps": 9960, "loss": 0.0, "lr": 2.694483450638685e-07, "epoch": 18.11244979919679, "percentage": 90.56, "elapsed_time": "0:23:31", "remaining_time": "0:02:27", "throughput": 3988.7, "total_tokens": 5629280}
1814
+ {"current_steps": 9025, "total_steps": 9960, "loss": 0.0, "lr": 2.666181836966053e-07, "epoch": 18.122489959839356, "percentage": 90.61, "elapsed_time": "0:23:32", "remaining_time": "0:02:26", "throughput": 3988.75, "total_tokens": 5632256}
1815
+ {"current_steps": 9030, "total_steps": 9960, "loss": 0.0002, "lr": 2.6380255707663285e-07, "epoch": 18.132530120481928, "percentage": 90.66, "elapsed_time": "0:23:32", "remaining_time": "0:02:25", "throughput": 3988.76, "total_tokens": 5634688}
1816
+ {"current_steps": 9035, "total_steps": 9960, "loss": 0.0, "lr": 2.610014738498656e-07, "epoch": 18.1425702811245, "percentage": 90.71, "elapsed_time": "0:23:33", "remaining_time": "0:02:24", "throughput": 3988.88, "total_tokens": 5637984}
1817
+ {"current_steps": 9040, "total_steps": 9960, "loss": 0.0, "lr": 2.5821494261756284e-07, "epoch": 18.152610441767067, "percentage": 90.76, "elapsed_time": "0:23:34", "remaining_time": "0:02:23", "throughput": 3989.05, "total_tokens": 5641440}
1818
+ {"current_steps": 9045, "total_steps": 9960, "loss": 0.0529, "lr": 2.554429719362972e-07, "epoch": 18.162650602409638, "percentage": 90.81, "elapsed_time": "0:23:35", "remaining_time": "0:02:23", "throughput": 3989.28, "total_tokens": 5644960}
1819
+ {"current_steps": 9050, "total_steps": 9960, "loss": 0.0, "lr": 2.526855703179304e-07, "epoch": 18.17269076305221, "percentage": 90.86, "elapsed_time": "0:23:35", "remaining_time": "0:02:22", "throughput": 3989.5, "total_tokens": 5648512}
1820
+ {"current_steps": 9055, "total_steps": 9960, "loss": 0.0, "lr": 2.4994274622958726e-07, "epoch": 18.18273092369478, "percentage": 90.91, "elapsed_time": "0:23:36", "remaining_time": "0:02:21", "throughput": 3989.75, "total_tokens": 5651584}
1821
+ {"current_steps": 9060, "total_steps": 9960, "loss": 0.0, "lr": 2.4721450809363054e-07, "epoch": 18.19277108433735, "percentage": 90.96, "elapsed_time": "0:23:37", "remaining_time": "0:02:20", "throughput": 3989.86, "total_tokens": 5654720}
1822
+ {"current_steps": 9065, "total_steps": 9960, "loss": 0.0, "lr": 2.4450086428763345e-07, "epoch": 18.20281124497992, "percentage": 91.01, "elapsed_time": "0:23:38", "remaining_time": "0:02:20", "throughput": 3989.96, "total_tokens": 5657952}
1823
+ {"current_steps": 9070, "total_steps": 9960, "loss": 0.0, "lr": 2.4180182314435305e-07, "epoch": 18.21285140562249, "percentage": 91.06, "elapsed_time": "0:23:38", "remaining_time": "0:02:19", "throughput": 3990.13, "total_tokens": 5661120}
1824
+ {"current_steps": 9075, "total_steps": 9960, "loss": 0.0, "lr": 2.3911739295170875e-07, "epoch": 18.22289156626506, "percentage": 91.11, "elapsed_time": "0:23:39", "remaining_time": "0:02:18", "throughput": 3990.37, "total_tokens": 5664704}
1825
+ {"current_steps": 9080, "total_steps": 9960, "loss": 0.0, "lr": 2.364475819527523e-07, "epoch": 18.23293172690763, "percentage": 91.16, "elapsed_time": "0:23:40", "remaining_time": "0:02:17", "throughput": 3990.52, "total_tokens": 5667744}
1826
+ {"current_steps": 9085, "total_steps": 9960, "loss": 0.0, "lr": 2.3379239834564526e-07, "epoch": 18.2429718875502, "percentage": 91.21, "elapsed_time": "0:23:40", "remaining_time": "0:02:16", "throughput": 3990.61, "total_tokens": 5670496}
1827
+ {"current_steps": 9090, "total_steps": 9960, "loss": 0.0, "lr": 2.3115185028363186e-07, "epoch": 18.253012048192772, "percentage": 91.27, "elapsed_time": "0:23:41", "remaining_time": "0:02:16", "throughput": 3990.78, "total_tokens": 5673632}
1828
+ {"current_steps": 9095, "total_steps": 9960, "loss": 0.0, "lr": 2.2852594587501887e-07, "epoch": 18.26305220883534, "percentage": 91.32, "elapsed_time": "0:23:42", "remaining_time": "0:02:15", "throughput": 3990.88, "total_tokens": 5676672}
1829
+ {"current_steps": 9100, "total_steps": 9960, "loss": 0.0, "lr": 2.259146931831413e-07, "epoch": 18.27309236947791, "percentage": 91.37, "elapsed_time": "0:23:43", "remaining_time": "0:02:14", "throughput": 3991.1, "total_tokens": 5680352}
1830
+ {"current_steps": 9105, "total_steps": 9960, "loss": 0.0, "lr": 2.2331810022634847e-07, "epoch": 18.283132530120483, "percentage": 91.42, "elapsed_time": "0:23:43", "remaining_time": "0:02:13", "throughput": 3991.13, "total_tokens": 5683104}
1831
+ {"current_steps": 9110, "total_steps": 9960, "loss": 0.0001, "lr": 2.2073617497797018e-07, "epoch": 18.29317269076305, "percentage": 91.47, "elapsed_time": "0:23:44", "remaining_time": "0:02:12", "throughput": 3991.27, "total_tokens": 5686688}
1832
+ {"current_steps": 9115, "total_steps": 9960, "loss": 0.0, "lr": 2.1816892536629775e-07, "epoch": 18.303212851405622, "percentage": 91.52, "elapsed_time": "0:23:45", "remaining_time": "0:02:12", "throughput": 3991.36, "total_tokens": 5689600}
1833
+ {"current_steps": 9120, "total_steps": 9960, "loss": 0.0, "lr": 2.1561635927456083e-07, "epoch": 18.313253012048193, "percentage": 91.57, "elapsed_time": "0:23:46", "remaining_time": "0:02:11", "throughput": 3991.53, "total_tokens": 5692768}
1834
+ {"current_steps": 9125, "total_steps": 9960, "loss": 0.0, "lr": 2.1307848454089452e-07, "epoch": 18.323293172690764, "percentage": 91.62, "elapsed_time": "0:23:46", "remaining_time": "0:02:10", "throughput": 3991.63, "total_tokens": 5695584}
1835
+ {"current_steps": 9130, "total_steps": 9960, "loss": 0.0, "lr": 2.1055530895832897e-07, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:23:47", "remaining_time": "0:02:09", "throughput": 3991.76, "total_tokens": 5698784}
1836
+ {"current_steps": 9135, "total_steps": 9960, "loss": 0.0, "lr": 2.0804684027474987e-07, "epoch": 18.343373493975903, "percentage": 91.72, "elapsed_time": "0:23:48", "remaining_time": "0:02:08", "throughput": 3991.81, "total_tokens": 5701504}
1837
+ {"current_steps": 9140, "total_steps": 9960, "loss": 0.0, "lr": 2.055530861928884e-07, "epoch": 18.353413654618475, "percentage": 91.77, "elapsed_time": "0:23:49", "remaining_time": "0:02:08", "throughput": 3992.04, "total_tokens": 5705216}
1838
+ {"current_steps": 9145, "total_steps": 9960, "loss": 0.0001, "lr": 2.0307405437029027e-07, "epoch": 18.363453815261042, "percentage": 91.82, "elapsed_time": "0:23:49", "remaining_time": "0:02:07", "throughput": 3992.25, "total_tokens": 5708576}
1839
+ {"current_steps": 9150, "total_steps": 9960, "loss": 0.0, "lr": 2.006097524192918e-07, "epoch": 18.373493975903614, "percentage": 91.87, "elapsed_time": "0:23:50", "remaining_time": "0:02:06", "throughput": 3992.56, "total_tokens": 5712288}
1840
+ {"current_steps": 9155, "total_steps": 9960, "loss": 0.0, "lr": 1.9816018790700165e-07, "epoch": 18.383534136546185, "percentage": 91.92, "elapsed_time": "0:23:51", "remaining_time": "0:02:05", "throughput": 3992.74, "total_tokens": 5715648}
1841
+ {"current_steps": 9160, "total_steps": 9960, "loss": 0.0, "lr": 1.9572536835527013e-07, "epoch": 18.393574297188756, "percentage": 91.97, "elapsed_time": "0:23:52", "remaining_time": "0:02:05", "throughput": 3992.88, "total_tokens": 5718720}
1842
+ {"current_steps": 9165, "total_steps": 9960, "loss": 0.0, "lr": 1.933053012406749e-07, "epoch": 18.403614457831324, "percentage": 92.02, "elapsed_time": "0:23:53", "remaining_time": "0:02:04", "throughput": 3993.13, "total_tokens": 5722560}
1843
+ {"current_steps": 9170, "total_steps": 9960, "loss": 0.0, "lr": 1.908999939944911e-07, "epoch": 18.413654618473895, "percentage": 92.07, "elapsed_time": "0:23:53", "remaining_time": "0:02:03", "throughput": 3993.18, "total_tokens": 5725408}
1844
+ {"current_steps": 9175, "total_steps": 9960, "loss": 0.0001, "lr": 1.8850945400266994e-07, "epoch": 18.423694779116467, "percentage": 92.12, "elapsed_time": "0:23:54", "remaining_time": "0:02:02", "throughput": 3993.36, "total_tokens": 5729024}
1845
+ {"current_steps": 9180, "total_steps": 9960, "loss": 0.0, "lr": 1.861336886058196e-07, "epoch": 18.433734939759034, "percentage": 92.17, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.46, "total_tokens": 5731584}
1846
+ {"current_steps": 9185, "total_steps": 9960, "loss": 0.0, "lr": 1.8377270509917777e-07, "epoch": 18.443775100401606, "percentage": 92.22, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.58, "total_tokens": 5734624}
1847
+ {"current_steps": 9190, "total_steps": 9960, "loss": 0.0001, "lr": 1.81426510732593e-07, "epoch": 18.453815261044177, "percentage": 92.27, "elapsed_time": "0:23:56", "remaining_time": "0:02:00", "throughput": 3993.73, "total_tokens": 5737920}
1848
+ {"current_steps": 9195, "total_steps": 9960, "loss": 0.0, "lr": 1.7909511271050006e-07, "epoch": 18.46385542168675, "percentage": 92.32, "elapsed_time": "0:23:57", "remaining_time": "0:01:59", "throughput": 3993.81, "total_tokens": 5740896}
1849
+ {"current_steps": 9200, "total_steps": 9960, "loss": 0.0, "lr": 1.7677851819189907e-07, "epoch": 18.473895582329316, "percentage": 92.37, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.94, "total_tokens": 5744000}
1850
+ {"current_steps": 9205, "total_steps": 9960, "loss": 0.0, "lr": 1.7447673429033361e-07, "epoch": 18.483935742971887, "percentage": 92.42, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.92, "total_tokens": 5746816}
1851
+ {"current_steps": 9210, "total_steps": 9960, "loss": 0.0, "lr": 1.7218976807386767e-07, "epoch": 18.49397590361446, "percentage": 92.47, "elapsed_time": "0:23:59", "remaining_time": "0:01:57", "throughput": 3993.97, "total_tokens": 5749696}
1852
+ {"current_steps": 9215, "total_steps": 9960, "loss": 0.0, "lr": 1.6991762656506483e-07, "epoch": 18.50401606425703, "percentage": 92.52, "elapsed_time": "0:24:00", "remaining_time": "0:01:56", "throughput": 3994.08, "total_tokens": 5752544}
1853
+ {"current_steps": 9220, "total_steps": 9960, "loss": 0.0, "lr": 1.6766031674096795e-07, "epoch": 18.514056224899598, "percentage": 92.57, "elapsed_time": "0:24:01", "remaining_time": "0:01:55", "throughput": 3994.38, "total_tokens": 5756672}
1854
+ {"current_steps": 9225, "total_steps": 9960, "loss": 0.0, "lr": 1.654178455330735e-07, "epoch": 18.52409638554217, "percentage": 92.62, "elapsed_time": "0:24:01", "remaining_time": "0:01:54", "throughput": 3994.52, "total_tokens": 5759520}
1855
+ {"current_steps": 9230, "total_steps": 9960, "loss": 0.0, "lr": 1.631902198273172e-07, "epoch": 18.53413654618474, "percentage": 92.67, "elapsed_time": "0:24:02", "remaining_time": "0:01:54", "throughput": 3994.6, "total_tokens": 5762848}
1856
+ {"current_steps": 9235, "total_steps": 9960, "loss": 0.0, "lr": 1.6097744646404457e-07, "epoch": 18.544176706827308, "percentage": 92.72, "elapsed_time": "0:24:03", "remaining_time": "0:01:53", "throughput": 3994.82, "total_tokens": 5766496}
1857
+ {"current_steps": 9240, "total_steps": 9960, "loss": 0.0, "lr": 1.5877953223799703e-07, "epoch": 18.55421686746988, "percentage": 92.77, "elapsed_time": "0:24:04", "remaining_time": "0:01:52", "throughput": 3995.0, "total_tokens": 5769600}
1858
+ {"current_steps": 9245, "total_steps": 9960, "loss": 0.0, "lr": 1.565964838982881e-07, "epoch": 18.56425702811245, "percentage": 92.82, "elapsed_time": "0:24:04", "remaining_time": "0:01:51", "throughput": 3995.14, "total_tokens": 5772800}
1859
+ {"current_steps": 9250, "total_steps": 9960, "loss": 0.0, "lr": 1.544283081483805e-07, "epoch": 18.57429718875502, "percentage": 92.87, "elapsed_time": "0:24:05", "remaining_time": "0:01:50", "throughput": 3995.31, "total_tokens": 5776416}
1860
+ {"current_steps": 9255, "total_steps": 9960, "loss": 0.0, "lr": 1.5227501164607138e-07, "epoch": 18.58433734939759, "percentage": 92.92, "elapsed_time": "0:24:06", "remaining_time": "0:01:50", "throughput": 3995.28, "total_tokens": 5778976}
1861
+ {"current_steps": 9260, "total_steps": 9960, "loss": 0.0, "lr": 1.501366010034644e-07, "epoch": 18.59437751004016, "percentage": 92.97, "elapsed_time": "0:24:07", "remaining_time": "0:01:49", "throughput": 3995.46, "total_tokens": 5782400}
1862
+ {"current_steps": 9265, "total_steps": 9960, "loss": 0.0, "lr": 1.4801308278695636e-07, "epoch": 18.604417670682732, "percentage": 93.02, "elapsed_time": "0:24:07", "remaining_time": "0:01:48", "throughput": 3995.45, "total_tokens": 5784640}
1863
+ {"current_steps": 9270, "total_steps": 9960, "loss": 0.0, "lr": 1.45904463517213e-07, "epoch": 18.6144578313253, "percentage": 93.07, "elapsed_time": "0:24:08", "remaining_time": "0:01:47", "throughput": 3995.67, "total_tokens": 5787936}
1864
+ {"current_steps": 9275, "total_steps": 9960, "loss": 0.0, "lr": 1.4381074966914987e-07, "epoch": 18.62449799196787, "percentage": 93.12, "elapsed_time": "0:24:09", "remaining_time": "0:01:47", "throughput": 3995.85, "total_tokens": 5791584}
1865
+ {"current_steps": 9280, "total_steps": 9960, "loss": 0.0, "lr": 1.4173194767191257e-07, "epoch": 18.634538152610443, "percentage": 93.17, "elapsed_time": "0:24:10", "remaining_time": "0:01:46", "throughput": 3995.96, "total_tokens": 5794912}
1866
+ {"current_steps": 9285, "total_steps": 9960, "loss": 0.0, "lr": 1.396680639088571e-07, "epoch": 18.644578313253014, "percentage": 93.22, "elapsed_time": "0:24:10", "remaining_time": "0:01:45", "throughput": 3996.02, "total_tokens": 5797568}
1867
+ {"current_steps": 9290, "total_steps": 9960, "loss": 0.0, "lr": 1.3761910471753126e-07, "epoch": 18.65461847389558, "percentage": 93.27, "elapsed_time": "0:24:11", "remaining_time": "0:01:44", "throughput": 3996.19, "total_tokens": 5801088}
1868
+ {"current_steps": 9295, "total_steps": 9960, "loss": 0.0, "lr": 1.3558507638965158e-07, "epoch": 18.664658634538153, "percentage": 93.32, "elapsed_time": "0:24:12", "remaining_time": "0:01:43", "throughput": 3996.27, "total_tokens": 5804096}
1869
+ {"current_steps": 9300, "total_steps": 9960, "loss": 0.0001, "lr": 1.3356598517108966e-07, "epoch": 18.674698795180724, "percentage": 93.37, "elapsed_time": "0:24:13", "remaining_time": "0:01:43", "throughput": 3996.45, "total_tokens": 5807392}
1870
+ {"current_steps": 9305, "total_steps": 9960, "loss": 0.0, "lr": 1.3156183726184657e-07, "epoch": 18.684738955823292, "percentage": 93.42, "elapsed_time": "0:24:13", "remaining_time": "0:01:42", "throughput": 3996.63, "total_tokens": 5810848}
1871
+ {"current_steps": 9310, "total_steps": 9960, "loss": 0.0, "lr": 1.295726388160412e-07, "epoch": 18.694779116465863, "percentage": 93.47, "elapsed_time": "0:24:14", "remaining_time": "0:01:41", "throughput": 3996.79, "total_tokens": 5814176}
1872
+ {"current_steps": 9315, "total_steps": 9960, "loss": 0.0, "lr": 1.2759839594188307e-07, "epoch": 18.704819277108435, "percentage": 93.52, "elapsed_time": "0:24:15", "remaining_time": "0:01:40", "throughput": 3996.91, "total_tokens": 5816736}
1873
+ {"current_steps": 9320, "total_steps": 9960, "loss": 0.0, "lr": 1.2563911470166057e-07, "epoch": 18.714859437751002, "percentage": 93.57, "elapsed_time": "0:24:15", "remaining_time": "0:01:39", "throughput": 3996.89, "total_tokens": 5819360}
1874
+ {"current_steps": 9325, "total_steps": 9960, "loss": 0.0, "lr": 1.2369480111171784e-07, "epoch": 18.724899598393574, "percentage": 93.62, "elapsed_time": "0:24:16", "remaining_time": "0:01:39", "throughput": 3997.0, "total_tokens": 5822304}
1875
+ {"current_steps": 9330, "total_steps": 9960, "loss": 0.0, "lr": 1.2176546114243903e-07, "epoch": 18.734939759036145, "percentage": 93.67, "elapsed_time": "0:24:17", "remaining_time": "0:01:38", "throughput": 3997.1, "total_tokens": 5824768}
1876
+ {"current_steps": 9335, "total_steps": 9960, "loss": 0.0, "lr": 1.198511007182296e-07, "epoch": 18.744979919678716, "percentage": 93.72, "elapsed_time": "0:24:17", "remaining_time": "0:01:37", "throughput": 3997.15, "total_tokens": 5827488}
1877
+ {"current_steps": 9340, "total_steps": 9960, "loss": 0.0, "lr": 1.1795172571749503e-07, "epoch": 18.755020080321284, "percentage": 93.78, "elapsed_time": "0:24:18", "remaining_time": "0:01:36", "throughput": 3997.21, "total_tokens": 5830496}
1878
+ {"current_steps": 9345, "total_steps": 9960, "loss": 0.0, "lr": 1.160673419726288e-07, "epoch": 18.765060240963855, "percentage": 93.83, "elapsed_time": "0:24:19", "remaining_time": "0:01:36", "throughput": 3997.4, "total_tokens": 5833952}
1879
+ {"current_steps": 9350, "total_steps": 9960, "loss": 0.0, "lr": 1.1419795526998679e-07, "epoch": 18.775100401606426, "percentage": 93.88, "elapsed_time": "0:24:20", "remaining_time": "0:01:35", "throughput": 3997.59, "total_tokens": 5837280}
1880
+ {"current_steps": 9355, "total_steps": 9960, "loss": 0.0, "lr": 1.1234357134987717e-07, "epoch": 18.785140562248998, "percentage": 93.93, "elapsed_time": "0:24:20", "remaining_time": "0:01:34", "throughput": 3997.62, "total_tokens": 5839936}
1881
+ {"current_steps": 9360, "total_steps": 9960, "loss": 0.0002, "lr": 1.1050419590653726e-07, "epoch": 18.795180722891565, "percentage": 93.98, "elapsed_time": "0:24:21", "remaining_time": "0:01:33", "throughput": 3997.79, "total_tokens": 5843584}
1882
+ {"current_steps": 9365, "total_steps": 9960, "loss": 0.0, "lr": 1.0867983458811792e-07, "epoch": 18.805220883534137, "percentage": 94.03, "elapsed_time": "0:24:22", "remaining_time": "0:01:32", "throughput": 3997.88, "total_tokens": 5846624}
1883
+ {"current_steps": 9370, "total_steps": 9960, "loss": 0.0, "lr": 1.0687049299666796e-07, "epoch": 18.815261044176708, "percentage": 94.08, "elapsed_time": "0:24:23", "remaining_time": "0:01:32", "throughput": 3998.09, "total_tokens": 5850112}
1884
+ {"current_steps": 9375, "total_steps": 9960, "loss": 0.0, "lr": 1.050761766881131e-07, "epoch": 18.825301204819276, "percentage": 94.13, "elapsed_time": "0:24:24", "remaining_time": "0:01:31", "throughput": 3998.33, "total_tokens": 5853856}
1885
+ {"current_steps": 9380, "total_steps": 9960, "loss": 0.0, "lr": 1.0329689117224262e-07, "epoch": 18.835341365461847, "percentage": 94.18, "elapsed_time": "0:24:24", "remaining_time": "0:01:30", "throughput": 3998.53, "total_tokens": 5857024}
1886
+ {"current_steps": 9385, "total_steps": 9960, "loss": 0.0, "lr": 1.0153264191269052e-07, "epoch": 18.84538152610442, "percentage": 94.23, "elapsed_time": "0:24:25", "remaining_time": "0:01:29", "throughput": 3998.64, "total_tokens": 5860128}
1887
+ {"current_steps": 9390, "total_steps": 9960, "loss": 0.0, "lr": 9.978343432691884e-08, "epoch": 18.855421686746986, "percentage": 94.28, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.68, "total_tokens": 5862336}
1888
+ {"current_steps": 9395, "total_steps": 9960, "loss": 0.0, "lr": 9.804927378620155e-08, "epoch": 18.865461847389557, "percentage": 94.33, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.95, "total_tokens": 5865792}
1889
+ {"current_steps": 9400, "total_steps": 9960, "loss": 0.0, "lr": 9.633016561560793e-08, "epoch": 18.87550200803213, "percentage": 94.38, "elapsed_time": "0:24:27", "remaining_time": "0:01:27", "throughput": 3999.23, "total_tokens": 5869280}
1890
+ {"current_steps": 9405, "total_steps": 9960, "loss": 0.0, "lr": 9.462611509398534e-08, "epoch": 18.8855421686747, "percentage": 94.43, "elapsed_time": "0:24:28", "remaining_time": "0:01:26", "throughput": 3999.36, "total_tokens": 5872288}
1891
+ {"current_steps": 9410, "total_steps": 9960, "loss": 0.0001, "lr": 9.293712745394479e-08, "epoch": 18.895582329317268, "percentage": 94.48, "elapsed_time": "0:24:28", "remaining_time": "0:01:25", "throughput": 3999.33, "total_tokens": 5874688}
1892
+ {"current_steps": 9415, "total_steps": 9960, "loss": 0.0, "lr": 9.126320788184374e-08, "epoch": 18.90562248995984, "percentage": 94.53, "elapsed_time": "0:24:29", "remaining_time": "0:01:25", "throughput": 3999.47, "total_tokens": 5877824}
1893
+ {"current_steps": 9420, "total_steps": 9960, "loss": 0.0, "lr": 8.960436151776886e-08, "epoch": 18.91566265060241, "percentage": 94.58, "elapsed_time": "0:24:30", "remaining_time": "0:01:24", "throughput": 3999.61, "total_tokens": 5881056}
1894
+ {"current_steps": 9425, "total_steps": 9960, "loss": 0.0, "lr": 8.796059345552389e-08, "epoch": 18.92570281124498, "percentage": 94.63, "elapsed_time": "0:24:31", "remaining_time": "0:01:23", "throughput": 3999.81, "total_tokens": 5884320}
1895
+ {"current_steps": 9430, "total_steps": 9960, "loss": 0.0, "lr": 8.633190874261011e-08, "epoch": 18.93574297188755, "percentage": 94.68, "elapsed_time": "0:24:31", "remaining_time": "0:01:22", "throughput": 3999.93, "total_tokens": 5887648}
1896
+ {"current_steps": 9435, "total_steps": 9960, "loss": 0.0, "lr": 8.471831238021366e-08, "epoch": 18.94578313253012, "percentage": 94.73, "elapsed_time": "0:24:32", "remaining_time": "0:01:21", "throughput": 4000.09, "total_tokens": 5890976}
1897
+ {"current_steps": 9440, "total_steps": 9960, "loss": 0.0, "lr": 8.31198093231872e-08, "epoch": 18.955823293172692, "percentage": 94.78, "elapsed_time": "0:24:33", "remaining_time": "0:01:21", "throughput": 4000.12, "total_tokens": 5893344}
1898
+ {"current_steps": 9445, "total_steps": 9960, "loss": 0.0, "lr": 8.153640448003875e-08, "epoch": 18.96586345381526, "percentage": 94.83, "elapsed_time": "0:24:33", "remaining_time": "0:01:20", "throughput": 4000.16, "total_tokens": 5895808}
1899
+ {"current_steps": 9450, "total_steps": 9960, "loss": 0.0, "lr": 7.996810271291344e-08, "epoch": 18.97590361445783, "percentage": 94.88, "elapsed_time": "0:24:34", "remaining_time": "0:01:19", "throughput": 4000.23, "total_tokens": 5899200}
1900
+ {"current_steps": 9455, "total_steps": 9960, "loss": 0.0, "lr": 7.841490883757907e-08, "epoch": 18.985943775100402, "percentage": 94.93, "elapsed_time": "0:24:35", "remaining_time": "0:01:18", "throughput": 4000.37, "total_tokens": 5902336}
1901
+ {"current_steps": 9460, "total_steps": 9960, "loss": 0.0, "lr": 7.687682762341276e-08, "epoch": 18.99598393574297, "percentage": 94.98, "elapsed_time": "0:24:36", "remaining_time": "0:01:18", "throughput": 4000.51, "total_tokens": 5905248}
1902
+ {"current_steps": 9465, "total_steps": 9960, "loss": 0.0, "lr": 7.535386379338371e-08, "epoch": 19.00602409638554, "percentage": 95.03, "elapsed_time": "0:24:37", "remaining_time": "0:01:17", "throughput": 4000.41, "total_tokens": 5908704}
1903
+ {"current_steps": 9470, "total_steps": 9960, "loss": 0.0, "lr": 7.384602202404335e-08, "epoch": 19.016064257028113, "percentage": 95.08, "elapsed_time": "0:24:37", "remaining_time": "0:01:16", "throughput": 4000.65, "total_tokens": 5912832}
1904
+ {"current_steps": 9475, "total_steps": 9960, "loss": 0.0, "lr": 7.235330694550402e-08, "epoch": 19.026104417670684, "percentage": 95.13, "elapsed_time": "0:24:38", "remaining_time": "0:01:15", "throughput": 4000.94, "total_tokens": 5917056}
1905
+ {"current_steps": 9480, "total_steps": 9960, "loss": 0.0, "lr": 7.087572314143198e-08, "epoch": 19.03614457831325, "percentage": 95.18, "elapsed_time": "0:24:39", "remaining_time": "0:01:14", "throughput": 4001.1, "total_tokens": 5920192}
1906
+ {"current_steps": 9485, "total_steps": 9960, "loss": 0.0059, "lr": 6.94132751490284e-08, "epoch": 19.046184738955823, "percentage": 95.23, "elapsed_time": "0:24:40", "remaining_time": "0:01:14", "throughput": 4001.11, "total_tokens": 5922368}
1907
+ {"current_steps": 9490, "total_steps": 9960, "loss": 0.0, "lr": 6.796596745901717e-08, "epoch": 19.056224899598394, "percentage": 95.28, "elapsed_time": "0:24:40", "remaining_time": "0:01:13", "throughput": 4001.16, "total_tokens": 5925056}
1908
+ {"current_steps": 9495, "total_steps": 9960, "loss": 0.0, "lr": 6.653380451563219e-08, "epoch": 19.066265060240966, "percentage": 95.33, "elapsed_time": "0:24:41", "remaining_time": "0:01:12", "throughput": 4001.22, "total_tokens": 5928256}
1909
+ {"current_steps": 9500, "total_steps": 9960, "loss": 0.0, "lr": 6.511679071659949e-08, "epoch": 19.076305220883533, "percentage": 95.38, "elapsed_time": "0:24:42", "remaining_time": "0:01:11", "throughput": 4001.34, "total_tokens": 5931392}
1910
+ {"current_steps": 9505, "total_steps": 9960, "loss": 0.0, "lr": 6.371493041313126e-08, "epoch": 19.086345381526105, "percentage": 95.43, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.49, "total_tokens": 5934464}
1911
+ {"current_steps": 9510, "total_steps": 9960, "loss": 0.0, "lr": 6.232822790990467e-08, "epoch": 19.096385542168676, "percentage": 95.48, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.63, "total_tokens": 5937568}
1912
+ {"current_steps": 9515, "total_steps": 9960, "loss": 0.0, "lr": 6.095668746505245e-08, "epoch": 19.106425702811244, "percentage": 95.53, "elapsed_time": "0:24:44", "remaining_time": "0:01:09", "throughput": 4001.8, "total_tokens": 5940768}
1913
+ {"current_steps": 9520, "total_steps": 9960, "loss": 0.0001, "lr": 5.96003132901507e-08, "epoch": 19.116465863453815, "percentage": 95.58, "elapsed_time": "0:24:45", "remaining_time": "0:01:08", "throughput": 4001.91, "total_tokens": 5944032}
1914
+ {"current_steps": 9525, "total_steps": 9960, "loss": 0.0001, "lr": 5.825910955020386e-08, "epoch": 19.126506024096386, "percentage": 95.63, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.05, "total_tokens": 5947168}
1915
+ {"current_steps": 9530, "total_steps": 9960, "loss": 0.0, "lr": 5.693308036363143e-08, "epoch": 19.136546184738958, "percentage": 95.68, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.2, "total_tokens": 5950592}
1916
+ {"current_steps": 9535, "total_steps": 9960, "loss": 0.0, "lr": 5.562222980225907e-08, "epoch": 19.146586345381525, "percentage": 95.73, "elapsed_time": "0:24:47", "remaining_time": "0:01:06", "throughput": 4002.26, "total_tokens": 5952960}
1917
+ {"current_steps": 9540, "total_steps": 9960, "loss": 0.0, "lr": 5.432656189130137e-08, "epoch": 19.156626506024097, "percentage": 95.78, "elapsed_time": "0:24:48", "remaining_time": "0:01:05", "throughput": 4002.45, "total_tokens": 5956288}
1918
+ {"current_steps": 9545, "total_steps": 9960, "loss": 0.0, "lr": 5.3046080609352455e-08, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:24:48", "remaining_time": "0:01:04", "throughput": 4002.47, "total_tokens": 5958752}
1919
+ {"current_steps": 9550, "total_steps": 9960, "loss": 0.0, "lr": 5.178078988837432e-08, "epoch": 19.176706827309236, "percentage": 95.88, "elapsed_time": "0:24:49", "remaining_time": "0:01:03", "throughput": 4002.57, "total_tokens": 5962144}
1920
+ {"current_steps": 9555, "total_steps": 9960, "loss": 0.0, "lr": 5.053069361368068e-08, "epoch": 19.186746987951807, "percentage": 95.93, "elapsed_time": "0:24:50", "remaining_time": "0:01:03", "throughput": 4002.78, "total_tokens": 5965280}
1921
+ {"current_steps": 9560, "total_steps": 9960, "loss": 0.0, "lr": 4.9295795623930945e-08, "epoch": 19.196787148594378, "percentage": 95.98, "elapsed_time": "0:24:50", "remaining_time": "0:01:02", "throughput": 4002.86, "total_tokens": 5968192}
1922
+ {"current_steps": 9565, "total_steps": 9960, "loss": 0.0, "lr": 4.807609971111238e-08, "epoch": 19.20682730923695, "percentage": 96.03, "elapsed_time": "0:24:51", "remaining_time": "0:01:01", "throughput": 4002.97, "total_tokens": 5971264}
1923
+ {"current_steps": 9570, "total_steps": 9960, "loss": 0.0, "lr": 4.68716096205335e-08, "epoch": 19.216867469879517, "percentage": 96.08, "elapsed_time": "0:24:52", "remaining_time": "0:01:00", "throughput": 4002.96, "total_tokens": 5973344}
1924
+ {"current_steps": 9575, "total_steps": 9960, "loss": 0.0, "lr": 4.5682329050810715e-08, "epoch": 19.22690763052209, "percentage": 96.13, "elapsed_time": "0:24:53", "remaining_time": "0:01:00", "throughput": 4003.19, "total_tokens": 5977248}
1925
+ {"current_steps": 9580, "total_steps": 9960, "loss": 0.0, "lr": 4.450826165385336e-08, "epoch": 19.23694779116466, "percentage": 96.18, "elapsed_time": "0:24:53", "remaining_time": "0:00:59", "throughput": 4003.37, "total_tokens": 5980704}
1926
+ {"current_steps": 9585, "total_steps": 9960, "loss": 0.0, "lr": 4.33494110348609e-08, "epoch": 19.246987951807228, "percentage": 96.23, "elapsed_time": "0:24:54", "remaining_time": "0:00:58", "throughput": 4003.53, "total_tokens": 5983936}
1927
+ {"current_steps": 9590, "total_steps": 9960, "loss": 0.0, "lr": 4.2205780752301865e-08, "epoch": 19.2570281124498, "percentage": 96.29, "elapsed_time": "0:24:55", "remaining_time": "0:00:57", "throughput": 4003.7, "total_tokens": 5987424}
1928
+ {"current_steps": 9595, "total_steps": 9960, "loss": 0.0, "lr": 4.107737431791159e-08, "epoch": 19.26706827309237, "percentage": 96.34, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.68, "total_tokens": 5990112}
1929
+ {"current_steps": 9600, "total_steps": 9960, "loss": 0.0, "lr": 3.996419519667505e-08, "epoch": 19.27710843373494, "percentage": 96.39, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.81, "total_tokens": 5993472}
1930
+ {"current_steps": 9605, "total_steps": 9960, "loss": 0.0, "lr": 3.8866246806821273e-08, "epoch": 19.28714859437751, "percentage": 96.44, "elapsed_time": "0:24:57", "remaining_time": "0:00:55", "throughput": 4003.84, "total_tokens": 5996320}
1931
+ {"current_steps": 9610, "total_steps": 9960, "loss": 0.0, "lr": 3.7783532519808376e-08, "epoch": 19.29718875502008, "percentage": 96.49, "elapsed_time": "0:24:58", "remaining_time": "0:00:54", "throughput": 4003.92, "total_tokens": 5999360}
1932
+ {"current_steps": 9615, "total_steps": 9960, "loss": 0.0, "lr": 3.671605566031633e-08, "epoch": 19.30722891566265, "percentage": 96.54, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.93, "total_tokens": 6002016}
1933
+ {"current_steps": 9620, "total_steps": 9960, "loss": 0.0, "lr": 3.566381950623588e-08, "epoch": 19.31726907630522, "percentage": 96.59, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.95, "total_tokens": 6004448}
1934
+ {"current_steps": 9625, "total_steps": 9960, "loss": 0.0, "lr": 3.462682728865685e-08, "epoch": 19.32730923694779, "percentage": 96.64, "elapsed_time": "0:25:00", "remaining_time": "0:00:52", "throughput": 4004.09, "total_tokens": 6007392}
1935
+ {"current_steps": 9630, "total_steps": 9960, "loss": 0.0, "lr": 3.3605082191860985e-08, "epoch": 19.337349397590362, "percentage": 96.69, "elapsed_time": "0:25:00", "remaining_time": "0:00:51", "throughput": 4004.16, "total_tokens": 6010176}
1936
+ {"current_steps": 9635, "total_steps": 9960, "loss": 0.0, "lr": 3.259858735331134e-08, "epoch": 19.347389558232933, "percentage": 96.74, "elapsed_time": "0:25:01", "remaining_time": "0:00:50", "throughput": 4004.33, "total_tokens": 6013120}
1937
+ {"current_steps": 9640, "total_steps": 9960, "loss": 0.0, "lr": 3.1607345863640114e-08, "epoch": 19.3574297188755, "percentage": 96.79, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.35, "total_tokens": 6015296}
1938
+ {"current_steps": 9645, "total_steps": 9960, "loss": 0.0001, "lr": 3.063136076664364e-08, "epoch": 19.367469879518072, "percentage": 96.84, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.39, "total_tokens": 6018016}
1939
+ {"current_steps": 9650, "total_steps": 9960, "loss": 0.0474, "lr": 2.967063505926848e-08, "epoch": 19.377510040160644, "percentage": 96.89, "elapsed_time": "0:25:03", "remaining_time": "0:00:48", "throughput": 4004.49, "total_tokens": 6021408}
1940
+ {"current_steps": 9655, "total_steps": 9960, "loss": 0.0, "lr": 2.8725171691605934e-08, "epoch": 19.38755020080321, "percentage": 96.94, "elapsed_time": "0:25:04", "remaining_time": "0:00:47", "throughput": 4004.63, "total_tokens": 6024544}
1941
+ {"current_steps": 9660, "total_steps": 9960, "loss": 0.0, "lr": 2.7794973566880323e-08, "epoch": 19.397590361445783, "percentage": 96.99, "elapsed_time": "0:25:05", "remaining_time": "0:00:46", "throughput": 4004.83, "total_tokens": 6027872}
1942
+ {"current_steps": 9665, "total_steps": 9960, "loss": 0.0, "lr": 2.6880043541441804e-08, "epoch": 19.407630522088354, "percentage": 97.04, "elapsed_time": "0:25:05", "remaining_time": "0:00:45", "throughput": 4004.92, "total_tokens": 6030784}
1943
+ {"current_steps": 9670, "total_steps": 9960, "loss": 0.0, "lr": 2.5980384424756366e-08, "epoch": 19.417670682730925, "percentage": 97.09, "elapsed_time": "0:25:06", "remaining_time": "0:00:45", "throughput": 4005.08, "total_tokens": 6034208}
1944
+ {"current_steps": 9675, "total_steps": 9960, "loss": 0.0001, "lr": 2.5095998979398628e-08, "epoch": 19.427710843373493, "percentage": 97.14, "elapsed_time": "0:25:07", "remaining_time": "0:00:44", "throughput": 4005.25, "total_tokens": 6037632}
1945
+ {"current_steps": 9680, "total_steps": 9960, "loss": 0.0, "lr": 2.4226889921041273e-08, "epoch": 19.437751004016064, "percentage": 97.19, "elapsed_time": "0:25:08", "remaining_time": "0:00:43", "throughput": 4005.45, "total_tokens": 6040928}
1946
+ {"current_steps": 9685, "total_steps": 9960, "loss": 0.0, "lr": 2.3373059918448958e-08, "epoch": 19.447791164658636, "percentage": 97.24, "elapsed_time": "0:25:08", "remaining_time": "0:00:42", "throughput": 4005.52, "total_tokens": 6044096}
1947
+ {"current_steps": 9690, "total_steps": 9960, "loss": 0.0, "lr": 2.2534511593468866e-08, "epoch": 19.457831325301203, "percentage": 97.29, "elapsed_time": "0:25:09", "remaining_time": "0:00:42", "throughput": 4005.71, "total_tokens": 6047456}
1948
+ {"current_steps": 9695, "total_steps": 9960, "loss": 0.0, "lr": 2.171124752102238e-08, "epoch": 19.467871485943775, "percentage": 97.34, "elapsed_time": "0:25:10", "remaining_time": "0:00:41", "throughput": 4005.72, "total_tokens": 6049856}
1949
+ {"current_steps": 9700, "total_steps": 9960, "loss": 0.0, "lr": 2.0903270229098992e-08, "epoch": 19.477911646586346, "percentage": 97.39, "elapsed_time": "0:25:10", "remaining_time": "0:00:40", "throughput": 4005.83, "total_tokens": 6052704}
1950
+ {"current_steps": 9705, "total_steps": 9960, "loss": 0.0, "lr": 2.0110582198745177e-08, "epoch": 19.487951807228917, "percentage": 97.44, "elapsed_time": "0:25:11", "remaining_time": "0:00:39", "throughput": 4005.95, "total_tokens": 6055456}
1951
+ {"current_steps": 9710, "total_steps": 9960, "loss": 0.0, "lr": 1.9333185864061077e-08, "epoch": 19.497991967871485, "percentage": 97.49, "elapsed_time": "0:25:12", "remaining_time": "0:00:38", "throughput": 4006.01, "total_tokens": 6058304}
1952
+ {"current_steps": 9715, "total_steps": 9960, "loss": 0.0, "lr": 1.8571083612188845e-08, "epoch": 19.508032128514056, "percentage": 97.54, "elapsed_time": "0:25:13", "remaining_time": "0:00:38", "throughput": 4006.09, "total_tokens": 6061440}
1953
+ {"current_steps": 9720, "total_steps": 9960, "loss": 0.0, "lr": 1.7824277783308197e-08, "epoch": 19.518072289156628, "percentage": 97.59, "elapsed_time": "0:25:13", "remaining_time": "0:00:37", "throughput": 4006.27, "total_tokens": 6065024}
1954
+ {"current_steps": 9725, "total_steps": 9960, "loss": 0.0, "lr": 1.7092770670628644e-08, "epoch": 19.528112449799195, "percentage": 97.64, "elapsed_time": "0:25:14", "remaining_time": "0:00:36", "throughput": 4006.44, "total_tokens": 6068256}
1955
+ {"current_steps": 9730, "total_steps": 9960, "loss": 0.0, "lr": 1.637656452038172e-08, "epoch": 19.538152610441767, "percentage": 97.69, "elapsed_time": "0:25:15", "remaining_time": "0:00:35", "throughput": 4006.6, "total_tokens": 6071200}
1956
+ {"current_steps": 9735, "total_steps": 9960, "loss": 0.0, "lr": 1.5675661531813215e-08, "epoch": 19.548192771084338, "percentage": 97.74, "elapsed_time": "0:25:16", "remaining_time": "0:00:35", "throughput": 4006.7, "total_tokens": 6074656}
1957
+ {"current_steps": 9740, "total_steps": 9960, "loss": 0.0, "lr": 1.4990063857180383e-08, "epoch": 19.55823293172691, "percentage": 97.79, "elapsed_time": "0:25:16", "remaining_time": "0:00:34", "throughput": 4006.74, "total_tokens": 6077408}
1958
+ {"current_steps": 9745, "total_steps": 9960, "loss": 0.0, "lr": 1.431977360173975e-08, "epoch": 19.568273092369477, "percentage": 97.84, "elapsed_time": "0:25:17", "remaining_time": "0:00:33", "throughput": 4006.82, "total_tokens": 6080352}
1959
+ {"current_steps": 9750, "total_steps": 9960, "loss": 0.0, "lr": 1.3664792823745442e-08, "epoch": 19.57831325301205, "percentage": 97.89, "elapsed_time": "0:25:18", "remaining_time": "0:00:32", "throughput": 4006.86, "total_tokens": 6082848}
1960
+ {"current_steps": 9755, "total_steps": 9960, "loss": 0.0, "lr": 1.3025123534440299e-08, "epoch": 19.58835341365462, "percentage": 97.94, "elapsed_time": "0:25:18", "remaining_time": "0:00:31", "throughput": 4006.99, "total_tokens": 6085664}
1961
+ {"current_steps": 9760, "total_steps": 9960, "loss": 0.0, "lr": 1.240076769804921e-08, "epoch": 19.598393574297187, "percentage": 97.99, "elapsed_time": "0:25:19", "remaining_time": "0:00:31", "throughput": 4007.02, "total_tokens": 6088608}
1962
+ {"current_steps": 9765, "total_steps": 9960, "loss": 0.0, "lr": 1.1791727231776906e-08, "epoch": 19.60843373493976, "percentage": 98.04, "elapsed_time": "0:25:20", "remaining_time": "0:00:30", "throughput": 4007.05, "total_tokens": 6091296}
1963
+ {"current_steps": 9770, "total_steps": 9960, "loss": 0.0, "lr": 1.1198004005796847e-08, "epoch": 19.61847389558233, "percentage": 98.09, "elapsed_time": "0:25:20", "remaining_time": "0:00:29", "throughput": 4007.18, "total_tokens": 6094880}
1964
+ {"current_steps": 9775, "total_steps": 9960, "loss": 0.0, "lr": 1.0619599843249006e-08, "epoch": 19.6285140562249, "percentage": 98.14, "elapsed_time": "0:25:21", "remaining_time": "0:00:28", "throughput": 4007.33, "total_tokens": 6098208}
1965
+ {"current_steps": 9780, "total_steps": 9960, "loss": 0.0, "lr": 1.0056516520232651e-08, "epoch": 19.63855421686747, "percentage": 98.19, "elapsed_time": "0:25:22", "remaining_time": "0:00:28", "throughput": 4007.36, "total_tokens": 6101024}
1966
+ {"current_steps": 9785, "total_steps": 9960, "loss": 0.0, "lr": 9.508755765802457e-09, "epoch": 19.64859437751004, "percentage": 98.24, "elapsed_time": "0:25:23", "remaining_time": "0:00:27", "throughput": 4007.44, "total_tokens": 6103904}
1967
+ {"current_steps": 9790, "total_steps": 9960, "loss": 0.0, "lr": 8.976319261962407e-09, "epoch": 19.65863453815261, "percentage": 98.29, "elapsed_time": "0:25:23", "remaining_time": "0:00:26", "throughput": 4007.57, "total_tokens": 6106816}
1968
+ {"current_steps": 9795, "total_steps": 9960, "loss": 0.0, "lr": 8.459208643659122e-09, "epoch": 19.66867469879518, "percentage": 98.34, "elapsed_time": "0:25:24", "remaining_time": "0:00:25", "throughput": 4007.72, "total_tokens": 6110368}
1969
+ {"current_steps": 9800, "total_steps": 9960, "loss": 0.0, "lr": 7.957425498778537e-09, "epoch": 19.67871485943775, "percentage": 98.39, "elapsed_time": "0:25:25", "remaining_time": "0:00:24", "throughput": 4007.88, "total_tokens": 6113856}
1970
+ {"current_steps": 9805, "total_steps": 9960, "loss": 0.0, "lr": 7.470971368142011e-09, "epoch": 19.688755020080322, "percentage": 98.44, "elapsed_time": "0:25:26", "remaining_time": "0:00:24", "throughput": 4007.94, "total_tokens": 6116448}
1971
+ {"current_steps": 9810, "total_steps": 9960, "loss": 0.0, "lr": 6.999847745498556e-09, "epoch": 19.698795180722893, "percentage": 98.49, "elapsed_time": "0:25:26", "remaining_time": "0:00:23", "throughput": 4008.19, "total_tokens": 6120096}
1972
+ {"current_steps": 9815, "total_steps": 9960, "loss": 0.0, "lr": 6.544056077523175e-09, "epoch": 19.70883534136546, "percentage": 98.54, "elapsed_time": "0:25:27", "remaining_time": "0:00:22", "throughput": 4008.25, "total_tokens": 6123008}
1973
+ {"current_steps": 9820, "total_steps": 9960, "loss": 0.0, "lr": 6.1035977638101985e-09, "epoch": 19.718875502008032, "percentage": 98.59, "elapsed_time": "0:25:28", "remaining_time": "0:00:21", "throughput": 4008.44, "total_tokens": 6126720}
1974
+ {"current_steps": 9825, "total_steps": 9960, "loss": 0.0, "lr": 5.678474156871061e-09, "epoch": 19.728915662650603, "percentage": 98.64, "elapsed_time": "0:25:29", "remaining_time": "0:00:21", "throughput": 4008.51, "total_tokens": 6129760}
1975
+ {"current_steps": 9830, "total_steps": 9960, "loss": 0.0, "lr": 5.268686562127645e-09, "epoch": 19.73895582329317, "percentage": 98.69, "elapsed_time": "0:25:30", "remaining_time": "0:00:20", "throughput": 4008.67, "total_tokens": 6133344}
1976
+ {"current_steps": 9835, "total_steps": 9960, "loss": 0.0, "lr": 4.874236237911723e-09, "epoch": 19.748995983935743, "percentage": 98.74, "elapsed_time": "0:25:30", "remaining_time": "0:00:19", "throughput": 4008.84, "total_tokens": 6136576}
1977
+ {"current_steps": 9840, "total_steps": 9960, "loss": 0.0, "lr": 4.495124395456629e-09, "epoch": 19.759036144578314, "percentage": 98.8, "elapsed_time": "0:25:31", "remaining_time": "0:00:18", "throughput": 4008.86, "total_tokens": 6139136}
1978
+ {"current_steps": 9845, "total_steps": 9960, "loss": 0.0, "lr": 4.1313521988983754e-09, "epoch": 19.769076305220885, "percentage": 98.85, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4008.98, "total_tokens": 6142240}
1979
+ {"current_steps": 9850, "total_steps": 9960, "loss": 0.0, "lr": 3.7829207652673175e-09, "epoch": 19.779116465863453, "percentage": 98.9, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4009.04, "total_tokens": 6145088}
1980
+ {"current_steps": 9855, "total_steps": 9960, "loss": 0.0, "lr": 3.44983116448927e-09, "epoch": 19.789156626506024, "percentage": 98.95, "elapsed_time": "0:25:33", "remaining_time": "0:00:16", "throughput": 4009.11, "total_tokens": 6147968}
1981
+ {"current_steps": 9860, "total_steps": 9960, "loss": 0.0, "lr": 3.1320844193788445e-09, "epoch": 19.799196787148595, "percentage": 99.0, "elapsed_time": "0:25:34", "remaining_time": "0:00:15", "throughput": 4009.28, "total_tokens": 6151296}
1982
+ {"current_steps": 9865, "total_steps": 9960, "loss": 0.0, "lr": 2.8296815056377824e-09, "epoch": 19.809236947791163, "percentage": 99.05, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.54, "total_tokens": 6154880}
1983
+ {"current_steps": 9870, "total_steps": 9960, "loss": 0.0, "lr": 2.54262335185107e-09, "epoch": 19.819277108433734, "percentage": 99.1, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.68, "total_tokens": 6158528}
1984
+ {"current_steps": 9875, "total_steps": 9960, "loss": 0.0, "lr": 2.2709108394863845e-09, "epoch": 19.829317269076306, "percentage": 99.15, "elapsed_time": "0:25:36", "remaining_time": "0:00:13", "throughput": 4009.81, "total_tokens": 6161600}
1985
+ {"current_steps": 9880, "total_steps": 9960, "loss": 0.0002, "lr": 2.0145448028874305e-09, "epoch": 19.839357429718877, "percentage": 99.2, "elapsed_time": "0:25:37", "remaining_time": "0:00:12", "throughput": 4009.85, "total_tokens": 6164288}
1986
+ {"current_steps": 9885, "total_steps": 9960, "loss": 0.0, "lr": 1.7735260292750522e-09, "epoch": 19.849397590361445, "percentage": 99.25, "elapsed_time": "0:25:38", "remaining_time": "0:00:11", "throughput": 4010.07, "total_tokens": 6167904}
1987
+ {"current_steps": 9890, "total_steps": 9960, "loss": 0.0, "lr": 1.547855258743347e-09, "epoch": 19.859437751004016, "percentage": 99.3, "elapsed_time": "0:25:38", "remaining_time": "0:00:10", "throughput": 4010.23, "total_tokens": 6171360}
1988
+ {"current_steps": 9895, "total_steps": 9960, "loss": 0.0, "lr": 1.3375331842574446e-09, "epoch": 19.869477911646587, "percentage": 99.35, "elapsed_time": "0:25:39", "remaining_time": "0:00:10", "throughput": 4010.4, "total_tokens": 6174848}
1989
+ {"current_steps": 9900, "total_steps": 9960, "loss": 0.0, "lr": 1.1425604516512868e-09, "epoch": 19.879518072289155, "percentage": 99.4, "elapsed_time": "0:25:40", "remaining_time": "0:00:09", "throughput": 4010.34, "total_tokens": 6177472}
1990
+ {"current_steps": 9905, "total_steps": 9960, "loss": 0.0, "lr": 9.629376596248518e-10, "epoch": 19.889558232931726, "percentage": 99.45, "elapsed_time": "0:25:41", "remaining_time": "0:00:08", "throughput": 4010.51, "total_tokens": 6180576}
1991
+ {"current_steps": 9910, "total_steps": 9960, "loss": 0.0, "lr": 7.986653597447102e-10, "epoch": 19.899598393574298, "percentage": 99.5, "elapsed_time": "0:25:41", "remaining_time": "0:00:07", "throughput": 4010.6, "total_tokens": 6183520}
1992
+ {"current_steps": 9915, "total_steps": 9960, "loss": 0.0, "lr": 6.497440564395829e-10, "epoch": 19.90963855421687, "percentage": 99.55, "elapsed_time": "0:25:42", "remaining_time": "0:00:07", "throughput": 4010.98, "total_tokens": 6188000}
1993
+ {"current_steps": 9920, "total_steps": 9960, "loss": 0.0, "lr": 5.161742070014519e-10, "epoch": 19.919678714859437, "percentage": 99.6, "elapsed_time": "0:25:43", "remaining_time": "0:00:06", "throughput": 4011.0, "total_tokens": 6190560}
1994
+ {"current_steps": 9925, "total_steps": 9960, "loss": 0.0, "lr": 3.9795622158111945e-10, "epoch": 19.929718875502008, "percentage": 99.65, "elapsed_time": "0:25:44", "remaining_time": "0:00:05", "throughput": 4011.2, "total_tokens": 6194080}
1995
+ {"current_steps": 9930, "total_steps": 9960, "loss": 0.0, "lr": 2.950904631893181e-10, "epoch": 19.93975903614458, "percentage": 99.7, "elapsed_time": "0:25:45", "remaining_time": "0:00:04", "throughput": 4011.29, "total_tokens": 6197728}
1996
+ {"current_steps": 9935, "total_steps": 9960, "loss": 0.0, "lr": 2.0757724769560062e-10, "epoch": 19.949799196787147, "percentage": 99.75, "elapsed_time": "0:25:45", "remaining_time": "0:00:03", "throughput": 4011.37, "total_tokens": 6201088}
1997
+ {"current_steps": 9940, "total_steps": 9960, "loss": 0.0, "lr": 1.354168438255643e-10, "epoch": 19.95983935742972, "percentage": 99.8, "elapsed_time": "0:25:46", "remaining_time": "0:00:03", "throughput": 4011.54, "total_tokens": 6204672}
1998
+ {"current_steps": 9945, "total_steps": 9960, "loss": 0.0, "lr": 7.860947316140621e-11, "epoch": 19.96987951807229, "percentage": 99.85, "elapsed_time": "0:25:47", "remaining_time": "0:00:02", "throughput": 4011.62, "total_tokens": 6207360}
1999
+ {"current_steps": 9950, "total_steps": 9960, "loss": 0.0, "lr": 3.715531014025775e-11, "epoch": 19.97991967871486, "percentage": 99.9, "elapsed_time": "0:25:48", "remaining_time": "0:00:01", "throughput": 4011.78, "total_tokens": 6210368}
2000
+ {"current_steps": 9955, "total_steps": 9960, "loss": 0.0, "lr": 1.1054482056405136e-11, "epoch": 19.98995983935743, "percentage": 99.95, "elapsed_time": "0:25:48", "remaining_time": "0:00:00", "throughput": 4011.77, "total_tokens": 6212800}
2001
+ {"current_steps": 9960, "total_steps": 9960, "loss": 0.0001, "lr": 3.0706905573829603e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:49", "remaining_time": "0:00:00", "throughput": 4011.79, "total_tokens": 6215968}
2002
+ {"current_steps": 9960, "total_steps": 9960, "eval_loss": 1.102670431137085, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:57", "remaining_time": "0:00:00", "throughput": 3990.99, "total_tokens": 6215968}
2003
+ {"current_steps": 9960, "total_steps": 9960, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:58", "remaining_time": "0:00:00", "throughput": 3987.92, "total_tokens": 6215968}