Training in progress, step 9960
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +198 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:caeebda46c8c536201990871c0f48980b50cd46eeec65b79f40fa2ff5dd03a72
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -1803,3 +1803,201 @@
|
|
| 1803 |
{"current_steps": 8970, "total_steps": 9960, "loss": 0.0, "lr": 2.985474358588658e-07, "epoch": 18.012048192771083, "percentage": 90.06, "elapsed_time": "0:23:23", "remaining_time": "0:02:34", "throughput": 3987.5, "total_tokens": 5598368}
|
| 1804 |
{"current_steps": 8975, "total_steps": 9960, "loss": 0.0, "lr": 2.9557241234973446e-07, "epoch": 18.022088353413654, "percentage": 90.11, "elapsed_time": "0:23:24", "remaining_time": "0:02:34", "throughput": 3987.62, "total_tokens": 5601664}
|
| 1805 |
{"current_steps": 8980, "total_steps": 9960, "loss": 0.0, "lr": 2.926118346784379e-07, "epoch": 18.032128514056225, "percentage": 90.16, "elapsed_time": "0:23:25", "remaining_time": "0:02:33", "throughput": 3987.77, "total_tokens": 5604736}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1803 |
{"current_steps": 8970, "total_steps": 9960, "loss": 0.0, "lr": 2.985474358588658e-07, "epoch": 18.012048192771083, "percentage": 90.06, "elapsed_time": "0:23:23", "remaining_time": "0:02:34", "throughput": 3987.5, "total_tokens": 5598368}
|
| 1804 |
{"current_steps": 8975, "total_steps": 9960, "loss": 0.0, "lr": 2.9557241234973446e-07, "epoch": 18.022088353413654, "percentage": 90.11, "elapsed_time": "0:23:24", "remaining_time": "0:02:34", "throughput": 3987.62, "total_tokens": 5601664}
|
| 1805 |
{"current_steps": 8980, "total_steps": 9960, "loss": 0.0, "lr": 2.926118346784379e-07, "epoch": 18.032128514056225, "percentage": 90.16, "elapsed_time": "0:23:25", "remaining_time": "0:02:33", "throughput": 3987.77, "total_tokens": 5604736}
|
| 1806 |
+
{"current_steps": 8985, "total_steps": 9960, "loss": 0.0, "lr": 2.8966571193599304e-07, "epoch": 18.042168674698797, "percentage": 90.21, "elapsed_time": "0:23:26", "remaining_time": "0:02:32", "throughput": 3987.93, "total_tokens": 5607936}
|
| 1807 |
+
{"current_steps": 8990, "total_steps": 9960, "loss": 0.0157, "lr": 2.8673405316902824e-07, "epoch": 18.052208835341364, "percentage": 90.26, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.03, "total_tokens": 5611200}
|
| 1808 |
+
{"current_steps": 8995, "total_steps": 9960, "loss": 0.0, "lr": 2.8381686737975867e-07, "epoch": 18.062248995983936, "percentage": 90.31, "elapsed_time": "0:23:27", "remaining_time": "0:02:31", "throughput": 3988.13, "total_tokens": 5613856}
|
| 1809 |
+
{"current_steps": 9000, "total_steps": 9960, "loss": 0.0002, "lr": 2.809141635259555e-07, "epoch": 18.072289156626507, "percentage": 90.36, "elapsed_time": "0:23:28", "remaining_time": "0:02:30", "throughput": 3988.26, "total_tokens": 5617152}
|
| 1810 |
+
{"current_steps": 9005, "total_steps": 9960, "loss": 0.0, "lr": 2.780259505209249e-07, "epoch": 18.082329317269075, "percentage": 90.41, "elapsed_time": "0:23:29", "remaining_time": "0:02:29", "throughput": 3988.37, "total_tokens": 5620160}
|
| 1811 |
+
{"current_steps": 9010, "total_steps": 9960, "loss": 0.0, "lr": 2.7515223723346974e-07, "epoch": 18.092369477911646, "percentage": 90.46, "elapsed_time": "0:23:29", "remaining_time": "0:02:28", "throughput": 3988.55, "total_tokens": 5623424}
|
| 1812 |
+
{"current_steps": 9015, "total_steps": 9960, "loss": 0.0, "lr": 2.722930324878748e-07, "epoch": 18.102409638554217, "percentage": 90.51, "elapsed_time": "0:23:30", "remaining_time": "0:02:27", "throughput": 3988.54, "total_tokens": 5626208}
|
| 1813 |
+
{"current_steps": 9020, "total_steps": 9960, "loss": 0.0, "lr": 2.694483450638685e-07, "epoch": 18.11244979919679, "percentage": 90.56, "elapsed_time": "0:23:31", "remaining_time": "0:02:27", "throughput": 3988.7, "total_tokens": 5629280}
|
| 1814 |
+
{"current_steps": 9025, "total_steps": 9960, "loss": 0.0, "lr": 2.666181836966053e-07, "epoch": 18.122489959839356, "percentage": 90.61, "elapsed_time": "0:23:32", "remaining_time": "0:02:26", "throughput": 3988.75, "total_tokens": 5632256}
|
| 1815 |
+
{"current_steps": 9030, "total_steps": 9960, "loss": 0.0002, "lr": 2.6380255707663285e-07, "epoch": 18.132530120481928, "percentage": 90.66, "elapsed_time": "0:23:32", "remaining_time": "0:02:25", "throughput": 3988.76, "total_tokens": 5634688}
|
| 1816 |
+
{"current_steps": 9035, "total_steps": 9960, "loss": 0.0, "lr": 2.610014738498656e-07, "epoch": 18.1425702811245, "percentage": 90.71, "elapsed_time": "0:23:33", "remaining_time": "0:02:24", "throughput": 3988.88, "total_tokens": 5637984}
|
| 1817 |
+
{"current_steps": 9040, "total_steps": 9960, "loss": 0.0, "lr": 2.5821494261756284e-07, "epoch": 18.152610441767067, "percentage": 90.76, "elapsed_time": "0:23:34", "remaining_time": "0:02:23", "throughput": 3989.05, "total_tokens": 5641440}
|
| 1818 |
+
{"current_steps": 9045, "total_steps": 9960, "loss": 0.0529, "lr": 2.554429719362972e-07, "epoch": 18.162650602409638, "percentage": 90.81, "elapsed_time": "0:23:35", "remaining_time": "0:02:23", "throughput": 3989.28, "total_tokens": 5644960}
|
| 1819 |
+
{"current_steps": 9050, "total_steps": 9960, "loss": 0.0, "lr": 2.526855703179304e-07, "epoch": 18.17269076305221, "percentage": 90.86, "elapsed_time": "0:23:35", "remaining_time": "0:02:22", "throughput": 3989.5, "total_tokens": 5648512}
|
| 1820 |
+
{"current_steps": 9055, "total_steps": 9960, "loss": 0.0, "lr": 2.4994274622958726e-07, "epoch": 18.18273092369478, "percentage": 90.91, "elapsed_time": "0:23:36", "remaining_time": "0:02:21", "throughput": 3989.75, "total_tokens": 5651584}
|
| 1821 |
+
{"current_steps": 9060, "total_steps": 9960, "loss": 0.0, "lr": 2.4721450809363054e-07, "epoch": 18.19277108433735, "percentage": 90.96, "elapsed_time": "0:23:37", "remaining_time": "0:02:20", "throughput": 3989.86, "total_tokens": 5654720}
|
| 1822 |
+
{"current_steps": 9065, "total_steps": 9960, "loss": 0.0, "lr": 2.4450086428763345e-07, "epoch": 18.20281124497992, "percentage": 91.01, "elapsed_time": "0:23:38", "remaining_time": "0:02:20", "throughput": 3989.96, "total_tokens": 5657952}
|
| 1823 |
+
{"current_steps": 9070, "total_steps": 9960, "loss": 0.0, "lr": 2.4180182314435305e-07, "epoch": 18.21285140562249, "percentage": 91.06, "elapsed_time": "0:23:38", "remaining_time": "0:02:19", "throughput": 3990.13, "total_tokens": 5661120}
|
| 1824 |
+
{"current_steps": 9075, "total_steps": 9960, "loss": 0.0, "lr": 2.3911739295170875e-07, "epoch": 18.22289156626506, "percentage": 91.11, "elapsed_time": "0:23:39", "remaining_time": "0:02:18", "throughput": 3990.37, "total_tokens": 5664704}
|
| 1825 |
+
{"current_steps": 9080, "total_steps": 9960, "loss": 0.0, "lr": 2.364475819527523e-07, "epoch": 18.23293172690763, "percentage": 91.16, "elapsed_time": "0:23:40", "remaining_time": "0:02:17", "throughput": 3990.52, "total_tokens": 5667744}
|
| 1826 |
+
{"current_steps": 9085, "total_steps": 9960, "loss": 0.0, "lr": 2.3379239834564526e-07, "epoch": 18.2429718875502, "percentage": 91.21, "elapsed_time": "0:23:40", "remaining_time": "0:02:16", "throughput": 3990.61, "total_tokens": 5670496}
|
| 1827 |
+
{"current_steps": 9090, "total_steps": 9960, "loss": 0.0, "lr": 2.3115185028363186e-07, "epoch": 18.253012048192772, "percentage": 91.27, "elapsed_time": "0:23:41", "remaining_time": "0:02:16", "throughput": 3990.78, "total_tokens": 5673632}
|
| 1828 |
+
{"current_steps": 9095, "total_steps": 9960, "loss": 0.0, "lr": 2.2852594587501887e-07, "epoch": 18.26305220883534, "percentage": 91.32, "elapsed_time": "0:23:42", "remaining_time": "0:02:15", "throughput": 3990.88, "total_tokens": 5676672}
|
| 1829 |
+
{"current_steps": 9100, "total_steps": 9960, "loss": 0.0, "lr": 2.259146931831413e-07, "epoch": 18.27309236947791, "percentage": 91.37, "elapsed_time": "0:23:43", "remaining_time": "0:02:14", "throughput": 3991.1, "total_tokens": 5680352}
|
| 1830 |
+
{"current_steps": 9105, "total_steps": 9960, "loss": 0.0, "lr": 2.2331810022634847e-07, "epoch": 18.283132530120483, "percentage": 91.42, "elapsed_time": "0:23:43", "remaining_time": "0:02:13", "throughput": 3991.13, "total_tokens": 5683104}
|
| 1831 |
+
{"current_steps": 9110, "total_steps": 9960, "loss": 0.0001, "lr": 2.2073617497797018e-07, "epoch": 18.29317269076305, "percentage": 91.47, "elapsed_time": "0:23:44", "remaining_time": "0:02:12", "throughput": 3991.27, "total_tokens": 5686688}
|
| 1832 |
+
{"current_steps": 9115, "total_steps": 9960, "loss": 0.0, "lr": 2.1816892536629775e-07, "epoch": 18.303212851405622, "percentage": 91.52, "elapsed_time": "0:23:45", "remaining_time": "0:02:12", "throughput": 3991.36, "total_tokens": 5689600}
|
| 1833 |
+
{"current_steps": 9120, "total_steps": 9960, "loss": 0.0, "lr": 2.1561635927456083e-07, "epoch": 18.313253012048193, "percentage": 91.57, "elapsed_time": "0:23:46", "remaining_time": "0:02:11", "throughput": 3991.53, "total_tokens": 5692768}
|
| 1834 |
+
{"current_steps": 9125, "total_steps": 9960, "loss": 0.0, "lr": 2.1307848454089452e-07, "epoch": 18.323293172690764, "percentage": 91.62, "elapsed_time": "0:23:46", "remaining_time": "0:02:10", "throughput": 3991.63, "total_tokens": 5695584}
|
| 1835 |
+
{"current_steps": 9130, "total_steps": 9960, "loss": 0.0, "lr": 2.1055530895832897e-07, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:23:47", "remaining_time": "0:02:09", "throughput": 3991.76, "total_tokens": 5698784}
|
| 1836 |
+
{"current_steps": 9135, "total_steps": 9960, "loss": 0.0, "lr": 2.0804684027474987e-07, "epoch": 18.343373493975903, "percentage": 91.72, "elapsed_time": "0:23:48", "remaining_time": "0:02:08", "throughput": 3991.81, "total_tokens": 5701504}
|
| 1837 |
+
{"current_steps": 9140, "total_steps": 9960, "loss": 0.0, "lr": 2.055530861928884e-07, "epoch": 18.353413654618475, "percentage": 91.77, "elapsed_time": "0:23:49", "remaining_time": "0:02:08", "throughput": 3992.04, "total_tokens": 5705216}
|
| 1838 |
+
{"current_steps": 9145, "total_steps": 9960, "loss": 0.0001, "lr": 2.0307405437029027e-07, "epoch": 18.363453815261042, "percentage": 91.82, "elapsed_time": "0:23:49", "remaining_time": "0:02:07", "throughput": 3992.25, "total_tokens": 5708576}
|
| 1839 |
+
{"current_steps": 9150, "total_steps": 9960, "loss": 0.0, "lr": 2.006097524192918e-07, "epoch": 18.373493975903614, "percentage": 91.87, "elapsed_time": "0:23:50", "remaining_time": "0:02:06", "throughput": 3992.56, "total_tokens": 5712288}
|
| 1840 |
+
{"current_steps": 9155, "total_steps": 9960, "loss": 0.0, "lr": 1.9816018790700165e-07, "epoch": 18.383534136546185, "percentage": 91.92, "elapsed_time": "0:23:51", "remaining_time": "0:02:05", "throughput": 3992.74, "total_tokens": 5715648}
|
| 1841 |
+
{"current_steps": 9160, "total_steps": 9960, "loss": 0.0, "lr": 1.9572536835527013e-07, "epoch": 18.393574297188756, "percentage": 91.97, "elapsed_time": "0:23:52", "remaining_time": "0:02:05", "throughput": 3992.88, "total_tokens": 5718720}
|
| 1842 |
+
{"current_steps": 9165, "total_steps": 9960, "loss": 0.0, "lr": 1.933053012406749e-07, "epoch": 18.403614457831324, "percentage": 92.02, "elapsed_time": "0:23:53", "remaining_time": "0:02:04", "throughput": 3993.13, "total_tokens": 5722560}
|
| 1843 |
+
{"current_steps": 9170, "total_steps": 9960, "loss": 0.0, "lr": 1.908999939944911e-07, "epoch": 18.413654618473895, "percentage": 92.07, "elapsed_time": "0:23:53", "remaining_time": "0:02:03", "throughput": 3993.18, "total_tokens": 5725408}
|
| 1844 |
+
{"current_steps": 9175, "total_steps": 9960, "loss": 0.0001, "lr": 1.8850945400266994e-07, "epoch": 18.423694779116467, "percentage": 92.12, "elapsed_time": "0:23:54", "remaining_time": "0:02:02", "throughput": 3993.36, "total_tokens": 5729024}
|
| 1845 |
+
{"current_steps": 9180, "total_steps": 9960, "loss": 0.0, "lr": 1.861336886058196e-07, "epoch": 18.433734939759034, "percentage": 92.17, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.46, "total_tokens": 5731584}
|
| 1846 |
+
{"current_steps": 9185, "total_steps": 9960, "loss": 0.0, "lr": 1.8377270509917777e-07, "epoch": 18.443775100401606, "percentage": 92.22, "elapsed_time": "0:23:55", "remaining_time": "0:02:01", "throughput": 3993.58, "total_tokens": 5734624}
|
| 1847 |
+
{"current_steps": 9190, "total_steps": 9960, "loss": 0.0001, "lr": 1.81426510732593e-07, "epoch": 18.453815261044177, "percentage": 92.27, "elapsed_time": "0:23:56", "remaining_time": "0:02:00", "throughput": 3993.73, "total_tokens": 5737920}
|
| 1848 |
+
{"current_steps": 9195, "total_steps": 9960, "loss": 0.0, "lr": 1.7909511271050006e-07, "epoch": 18.46385542168675, "percentage": 92.32, "elapsed_time": "0:23:57", "remaining_time": "0:01:59", "throughput": 3993.81, "total_tokens": 5740896}
|
| 1849 |
+
{"current_steps": 9200, "total_steps": 9960, "loss": 0.0, "lr": 1.7677851819189907e-07, "epoch": 18.473895582329316, "percentage": 92.37, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.94, "total_tokens": 5744000}
|
| 1850 |
+
{"current_steps": 9205, "total_steps": 9960, "loss": 0.0, "lr": 1.7447673429033361e-07, "epoch": 18.483935742971887, "percentage": 92.42, "elapsed_time": "0:23:58", "remaining_time": "0:01:58", "throughput": 3993.92, "total_tokens": 5746816}
|
| 1851 |
+
{"current_steps": 9210, "total_steps": 9960, "loss": 0.0, "lr": 1.7218976807386767e-07, "epoch": 18.49397590361446, "percentage": 92.47, "elapsed_time": "0:23:59", "remaining_time": "0:01:57", "throughput": 3993.97, "total_tokens": 5749696}
|
| 1852 |
+
{"current_steps": 9215, "total_steps": 9960, "loss": 0.0, "lr": 1.6991762656506483e-07, "epoch": 18.50401606425703, "percentage": 92.52, "elapsed_time": "0:24:00", "remaining_time": "0:01:56", "throughput": 3994.08, "total_tokens": 5752544}
|
| 1853 |
+
{"current_steps": 9220, "total_steps": 9960, "loss": 0.0, "lr": 1.6766031674096795e-07, "epoch": 18.514056224899598, "percentage": 92.57, "elapsed_time": "0:24:01", "remaining_time": "0:01:55", "throughput": 3994.38, "total_tokens": 5756672}
|
| 1854 |
+
{"current_steps": 9225, "total_steps": 9960, "loss": 0.0, "lr": 1.654178455330735e-07, "epoch": 18.52409638554217, "percentage": 92.62, "elapsed_time": "0:24:01", "remaining_time": "0:01:54", "throughput": 3994.52, "total_tokens": 5759520}
|
| 1855 |
+
{"current_steps": 9230, "total_steps": 9960, "loss": 0.0, "lr": 1.631902198273172e-07, "epoch": 18.53413654618474, "percentage": 92.67, "elapsed_time": "0:24:02", "remaining_time": "0:01:54", "throughput": 3994.6, "total_tokens": 5762848}
|
| 1856 |
+
{"current_steps": 9235, "total_steps": 9960, "loss": 0.0, "lr": 1.6097744646404457e-07, "epoch": 18.544176706827308, "percentage": 92.72, "elapsed_time": "0:24:03", "remaining_time": "0:01:53", "throughput": 3994.82, "total_tokens": 5766496}
|
| 1857 |
+
{"current_steps": 9240, "total_steps": 9960, "loss": 0.0, "lr": 1.5877953223799703e-07, "epoch": 18.55421686746988, "percentage": 92.77, "elapsed_time": "0:24:04", "remaining_time": "0:01:52", "throughput": 3995.0, "total_tokens": 5769600}
|
| 1858 |
+
{"current_steps": 9245, "total_steps": 9960, "loss": 0.0, "lr": 1.565964838982881e-07, "epoch": 18.56425702811245, "percentage": 92.82, "elapsed_time": "0:24:04", "remaining_time": "0:01:51", "throughput": 3995.14, "total_tokens": 5772800}
|
| 1859 |
+
{"current_steps": 9250, "total_steps": 9960, "loss": 0.0, "lr": 1.544283081483805e-07, "epoch": 18.57429718875502, "percentage": 92.87, "elapsed_time": "0:24:05", "remaining_time": "0:01:50", "throughput": 3995.31, "total_tokens": 5776416}
|
| 1860 |
+
{"current_steps": 9255, "total_steps": 9960, "loss": 0.0, "lr": 1.5227501164607138e-07, "epoch": 18.58433734939759, "percentage": 92.92, "elapsed_time": "0:24:06", "remaining_time": "0:01:50", "throughput": 3995.28, "total_tokens": 5778976}
|
| 1861 |
+
{"current_steps": 9260, "total_steps": 9960, "loss": 0.0, "lr": 1.501366010034644e-07, "epoch": 18.59437751004016, "percentage": 92.97, "elapsed_time": "0:24:07", "remaining_time": "0:01:49", "throughput": 3995.46, "total_tokens": 5782400}
|
| 1862 |
+
{"current_steps": 9265, "total_steps": 9960, "loss": 0.0, "lr": 1.4801308278695636e-07, "epoch": 18.604417670682732, "percentage": 93.02, "elapsed_time": "0:24:07", "remaining_time": "0:01:48", "throughput": 3995.45, "total_tokens": 5784640}
|
| 1863 |
+
{"current_steps": 9270, "total_steps": 9960, "loss": 0.0, "lr": 1.45904463517213e-07, "epoch": 18.6144578313253, "percentage": 93.07, "elapsed_time": "0:24:08", "remaining_time": "0:01:47", "throughput": 3995.67, "total_tokens": 5787936}
|
| 1864 |
+
{"current_steps": 9275, "total_steps": 9960, "loss": 0.0, "lr": 1.4381074966914987e-07, "epoch": 18.62449799196787, "percentage": 93.12, "elapsed_time": "0:24:09", "remaining_time": "0:01:47", "throughput": 3995.85, "total_tokens": 5791584}
|
| 1865 |
+
{"current_steps": 9280, "total_steps": 9960, "loss": 0.0, "lr": 1.4173194767191257e-07, "epoch": 18.634538152610443, "percentage": 93.17, "elapsed_time": "0:24:10", "remaining_time": "0:01:46", "throughput": 3995.96, "total_tokens": 5794912}
|
| 1866 |
+
{"current_steps": 9285, "total_steps": 9960, "loss": 0.0, "lr": 1.396680639088571e-07, "epoch": 18.644578313253014, "percentage": 93.22, "elapsed_time": "0:24:10", "remaining_time": "0:01:45", "throughput": 3996.02, "total_tokens": 5797568}
|
| 1867 |
+
{"current_steps": 9290, "total_steps": 9960, "loss": 0.0, "lr": 1.3761910471753126e-07, "epoch": 18.65461847389558, "percentage": 93.27, "elapsed_time": "0:24:11", "remaining_time": "0:01:44", "throughput": 3996.19, "total_tokens": 5801088}
|
| 1868 |
+
{"current_steps": 9295, "total_steps": 9960, "loss": 0.0, "lr": 1.3558507638965158e-07, "epoch": 18.664658634538153, "percentage": 93.32, "elapsed_time": "0:24:12", "remaining_time": "0:01:43", "throughput": 3996.27, "total_tokens": 5804096}
|
| 1869 |
+
{"current_steps": 9300, "total_steps": 9960, "loss": 0.0001, "lr": 1.3356598517108966e-07, "epoch": 18.674698795180724, "percentage": 93.37, "elapsed_time": "0:24:13", "remaining_time": "0:01:43", "throughput": 3996.45, "total_tokens": 5807392}
|
| 1870 |
+
{"current_steps": 9305, "total_steps": 9960, "loss": 0.0, "lr": 1.3156183726184657e-07, "epoch": 18.684738955823292, "percentage": 93.42, "elapsed_time": "0:24:13", "remaining_time": "0:01:42", "throughput": 3996.63, "total_tokens": 5810848}
|
| 1871 |
+
{"current_steps": 9310, "total_steps": 9960, "loss": 0.0, "lr": 1.295726388160412e-07, "epoch": 18.694779116465863, "percentage": 93.47, "elapsed_time": "0:24:14", "remaining_time": "0:01:41", "throughput": 3996.79, "total_tokens": 5814176}
|
| 1872 |
+
{"current_steps": 9315, "total_steps": 9960, "loss": 0.0, "lr": 1.2759839594188307e-07, "epoch": 18.704819277108435, "percentage": 93.52, "elapsed_time": "0:24:15", "remaining_time": "0:01:40", "throughput": 3996.91, "total_tokens": 5816736}
|
| 1873 |
+
{"current_steps": 9320, "total_steps": 9960, "loss": 0.0, "lr": 1.2563911470166057e-07, "epoch": 18.714859437751002, "percentage": 93.57, "elapsed_time": "0:24:15", "remaining_time": "0:01:39", "throughput": 3996.89, "total_tokens": 5819360}
|
| 1874 |
+
{"current_steps": 9325, "total_steps": 9960, "loss": 0.0, "lr": 1.2369480111171784e-07, "epoch": 18.724899598393574, "percentage": 93.62, "elapsed_time": "0:24:16", "remaining_time": "0:01:39", "throughput": 3997.0, "total_tokens": 5822304}
|
| 1875 |
+
{"current_steps": 9330, "total_steps": 9960, "loss": 0.0, "lr": 1.2176546114243903e-07, "epoch": 18.734939759036145, "percentage": 93.67, "elapsed_time": "0:24:17", "remaining_time": "0:01:38", "throughput": 3997.1, "total_tokens": 5824768}
|
| 1876 |
+
{"current_steps": 9335, "total_steps": 9960, "loss": 0.0, "lr": 1.198511007182296e-07, "epoch": 18.744979919678716, "percentage": 93.72, "elapsed_time": "0:24:17", "remaining_time": "0:01:37", "throughput": 3997.15, "total_tokens": 5827488}
|
| 1877 |
+
{"current_steps": 9340, "total_steps": 9960, "loss": 0.0, "lr": 1.1795172571749503e-07, "epoch": 18.755020080321284, "percentage": 93.78, "elapsed_time": "0:24:18", "remaining_time": "0:01:36", "throughput": 3997.21, "total_tokens": 5830496}
|
| 1878 |
+
{"current_steps": 9345, "total_steps": 9960, "loss": 0.0, "lr": 1.160673419726288e-07, "epoch": 18.765060240963855, "percentage": 93.83, "elapsed_time": "0:24:19", "remaining_time": "0:01:36", "throughput": 3997.4, "total_tokens": 5833952}
|
| 1879 |
+
{"current_steps": 9350, "total_steps": 9960, "loss": 0.0, "lr": 1.1419795526998679e-07, "epoch": 18.775100401606426, "percentage": 93.88, "elapsed_time": "0:24:20", "remaining_time": "0:01:35", "throughput": 3997.59, "total_tokens": 5837280}
|
| 1880 |
+
{"current_steps": 9355, "total_steps": 9960, "loss": 0.0, "lr": 1.1234357134987717e-07, "epoch": 18.785140562248998, "percentage": 93.93, "elapsed_time": "0:24:20", "remaining_time": "0:01:34", "throughput": 3997.62, "total_tokens": 5839936}
|
| 1881 |
+
{"current_steps": 9360, "total_steps": 9960, "loss": 0.0002, "lr": 1.1050419590653726e-07, "epoch": 18.795180722891565, "percentage": 93.98, "elapsed_time": "0:24:21", "remaining_time": "0:01:33", "throughput": 3997.79, "total_tokens": 5843584}
|
| 1882 |
+
{"current_steps": 9365, "total_steps": 9960, "loss": 0.0, "lr": 1.0867983458811792e-07, "epoch": 18.805220883534137, "percentage": 94.03, "elapsed_time": "0:24:22", "remaining_time": "0:01:32", "throughput": 3997.88, "total_tokens": 5846624}
|
| 1883 |
+
{"current_steps": 9370, "total_steps": 9960, "loss": 0.0, "lr": 1.0687049299666796e-07, "epoch": 18.815261044176708, "percentage": 94.08, "elapsed_time": "0:24:23", "remaining_time": "0:01:32", "throughput": 3998.09, "total_tokens": 5850112}
|
| 1884 |
+
{"current_steps": 9375, "total_steps": 9960, "loss": 0.0, "lr": 1.050761766881131e-07, "epoch": 18.825301204819276, "percentage": 94.13, "elapsed_time": "0:24:24", "remaining_time": "0:01:31", "throughput": 3998.33, "total_tokens": 5853856}
|
| 1885 |
+
{"current_steps": 9380, "total_steps": 9960, "loss": 0.0, "lr": 1.0329689117224262e-07, "epoch": 18.835341365461847, "percentage": 94.18, "elapsed_time": "0:24:24", "remaining_time": "0:01:30", "throughput": 3998.53, "total_tokens": 5857024}
|
| 1886 |
+
{"current_steps": 9385, "total_steps": 9960, "loss": 0.0, "lr": 1.0153264191269052e-07, "epoch": 18.84538152610442, "percentage": 94.23, "elapsed_time": "0:24:25", "remaining_time": "0:01:29", "throughput": 3998.64, "total_tokens": 5860128}
|
| 1887 |
+
{"current_steps": 9390, "total_steps": 9960, "loss": 0.0, "lr": 9.978343432691884e-08, "epoch": 18.855421686746986, "percentage": 94.28, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.68, "total_tokens": 5862336}
|
| 1888 |
+
{"current_steps": 9395, "total_steps": 9960, "loss": 0.0, "lr": 9.804927378620155e-08, "epoch": 18.865461847389557, "percentage": 94.33, "elapsed_time": "0:24:26", "remaining_time": "0:01:28", "throughput": 3998.95, "total_tokens": 5865792}
|
| 1889 |
+
{"current_steps": 9400, "total_steps": 9960, "loss": 0.0, "lr": 9.633016561560793e-08, "epoch": 18.87550200803213, "percentage": 94.38, "elapsed_time": "0:24:27", "remaining_time": "0:01:27", "throughput": 3999.23, "total_tokens": 5869280}
|
| 1890 |
+
{"current_steps": 9405, "total_steps": 9960, "loss": 0.0, "lr": 9.462611509398534e-08, "epoch": 18.8855421686747, "percentage": 94.43, "elapsed_time": "0:24:28", "remaining_time": "0:01:26", "throughput": 3999.36, "total_tokens": 5872288}
|
| 1891 |
+
{"current_steps": 9410, "total_steps": 9960, "loss": 0.0001, "lr": 9.293712745394479e-08, "epoch": 18.895582329317268, "percentage": 94.48, "elapsed_time": "0:24:28", "remaining_time": "0:01:25", "throughput": 3999.33, "total_tokens": 5874688}
|
| 1892 |
+
{"current_steps": 9415, "total_steps": 9960, "loss": 0.0, "lr": 9.126320788184374e-08, "epoch": 18.90562248995984, "percentage": 94.53, "elapsed_time": "0:24:29", "remaining_time": "0:01:25", "throughput": 3999.47, "total_tokens": 5877824}
|
| 1893 |
+
{"current_steps": 9420, "total_steps": 9960, "loss": 0.0, "lr": 8.960436151776886e-08, "epoch": 18.91566265060241, "percentage": 94.58, "elapsed_time": "0:24:30", "remaining_time": "0:01:24", "throughput": 3999.61, "total_tokens": 5881056}
|
| 1894 |
+
{"current_steps": 9425, "total_steps": 9960, "loss": 0.0, "lr": 8.796059345552389e-08, "epoch": 18.92570281124498, "percentage": 94.63, "elapsed_time": "0:24:31", "remaining_time": "0:01:23", "throughput": 3999.81, "total_tokens": 5884320}
|
| 1895 |
+
{"current_steps": 9430, "total_steps": 9960, "loss": 0.0, "lr": 8.633190874261011e-08, "epoch": 18.93574297188755, "percentage": 94.68, "elapsed_time": "0:24:31", "remaining_time": "0:01:22", "throughput": 3999.93, "total_tokens": 5887648}
|
| 1896 |
+
{"current_steps": 9435, "total_steps": 9960, "loss": 0.0, "lr": 8.471831238021366e-08, "epoch": 18.94578313253012, "percentage": 94.73, "elapsed_time": "0:24:32", "remaining_time": "0:01:21", "throughput": 4000.09, "total_tokens": 5890976}
|
| 1897 |
+
{"current_steps": 9440, "total_steps": 9960, "loss": 0.0, "lr": 8.31198093231872e-08, "epoch": 18.955823293172692, "percentage": 94.78, "elapsed_time": "0:24:33", "remaining_time": "0:01:21", "throughput": 4000.12, "total_tokens": 5893344}
|
| 1898 |
+
{"current_steps": 9445, "total_steps": 9960, "loss": 0.0, "lr": 8.153640448003875e-08, "epoch": 18.96586345381526, "percentage": 94.83, "elapsed_time": "0:24:33", "remaining_time": "0:01:20", "throughput": 4000.16, "total_tokens": 5895808}
|
| 1899 |
+
{"current_steps": 9450, "total_steps": 9960, "loss": 0.0, "lr": 7.996810271291344e-08, "epoch": 18.97590361445783, "percentage": 94.88, "elapsed_time": "0:24:34", "remaining_time": "0:01:19", "throughput": 4000.23, "total_tokens": 5899200}
|
| 1900 |
+
{"current_steps": 9455, "total_steps": 9960, "loss": 0.0, "lr": 7.841490883757907e-08, "epoch": 18.985943775100402, "percentage": 94.93, "elapsed_time": "0:24:35", "remaining_time": "0:01:18", "throughput": 4000.37, "total_tokens": 5902336}
|
| 1901 |
+
{"current_steps": 9460, "total_steps": 9960, "loss": 0.0, "lr": 7.687682762341276e-08, "epoch": 18.99598393574297, "percentage": 94.98, "elapsed_time": "0:24:36", "remaining_time": "0:01:18", "throughput": 4000.51, "total_tokens": 5905248}
|
| 1902 |
+
{"current_steps": 9465, "total_steps": 9960, "loss": 0.0, "lr": 7.535386379338371e-08, "epoch": 19.00602409638554, "percentage": 95.03, "elapsed_time": "0:24:37", "remaining_time": "0:01:17", "throughput": 4000.41, "total_tokens": 5908704}
|
| 1903 |
+
{"current_steps": 9470, "total_steps": 9960, "loss": 0.0, "lr": 7.384602202404335e-08, "epoch": 19.016064257028113, "percentage": 95.08, "elapsed_time": "0:24:37", "remaining_time": "0:01:16", "throughput": 4000.65, "total_tokens": 5912832}
|
| 1904 |
+
{"current_steps": 9475, "total_steps": 9960, "loss": 0.0, "lr": 7.235330694550402e-08, "epoch": 19.026104417670684, "percentage": 95.13, "elapsed_time": "0:24:38", "remaining_time": "0:01:15", "throughput": 4000.94, "total_tokens": 5917056}
|
| 1905 |
+
{"current_steps": 9480, "total_steps": 9960, "loss": 0.0, "lr": 7.087572314143198e-08, "epoch": 19.03614457831325, "percentage": 95.18, "elapsed_time": "0:24:39", "remaining_time": "0:01:14", "throughput": 4001.1, "total_tokens": 5920192}
|
| 1906 |
+
{"current_steps": 9485, "total_steps": 9960, "loss": 0.0059, "lr": 6.94132751490284e-08, "epoch": 19.046184738955823, "percentage": 95.23, "elapsed_time": "0:24:40", "remaining_time": "0:01:14", "throughput": 4001.11, "total_tokens": 5922368}
|
| 1907 |
+
{"current_steps": 9490, "total_steps": 9960, "loss": 0.0, "lr": 6.796596745901717e-08, "epoch": 19.056224899598394, "percentage": 95.28, "elapsed_time": "0:24:40", "remaining_time": "0:01:13", "throughput": 4001.16, "total_tokens": 5925056}
|
| 1908 |
+
{"current_steps": 9495, "total_steps": 9960, "loss": 0.0, "lr": 6.653380451563219e-08, "epoch": 19.066265060240966, "percentage": 95.33, "elapsed_time": "0:24:41", "remaining_time": "0:01:12", "throughput": 4001.22, "total_tokens": 5928256}
|
| 1909 |
+
{"current_steps": 9500, "total_steps": 9960, "loss": 0.0, "lr": 6.511679071659949e-08, "epoch": 19.076305220883533, "percentage": 95.38, "elapsed_time": "0:24:42", "remaining_time": "0:01:11", "throughput": 4001.34, "total_tokens": 5931392}
|
| 1910 |
+
{"current_steps": 9505, "total_steps": 9960, "loss": 0.0, "lr": 6.371493041313126e-08, "epoch": 19.086345381526105, "percentage": 95.43, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.49, "total_tokens": 5934464}
|
| 1911 |
+
{"current_steps": 9510, "total_steps": 9960, "loss": 0.0, "lr": 6.232822790990467e-08, "epoch": 19.096385542168676, "percentage": 95.48, "elapsed_time": "0:24:43", "remaining_time": "0:01:10", "throughput": 4001.63, "total_tokens": 5937568}
|
| 1912 |
+
{"current_steps": 9515, "total_steps": 9960, "loss": 0.0, "lr": 6.095668746505245e-08, "epoch": 19.106425702811244, "percentage": 95.53, "elapsed_time": "0:24:44", "remaining_time": "0:01:09", "throughput": 4001.8, "total_tokens": 5940768}
|
| 1913 |
+
{"current_steps": 9520, "total_steps": 9960, "loss": 0.0001, "lr": 5.96003132901507e-08, "epoch": 19.116465863453815, "percentage": 95.58, "elapsed_time": "0:24:45", "remaining_time": "0:01:08", "throughput": 4001.91, "total_tokens": 5944032}
|
| 1914 |
+
{"current_steps": 9525, "total_steps": 9960, "loss": 0.0001, "lr": 5.825910955020386e-08, "epoch": 19.126506024096386, "percentage": 95.63, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.05, "total_tokens": 5947168}
|
| 1915 |
+
{"current_steps": 9530, "total_steps": 9960, "loss": 0.0, "lr": 5.693308036363143e-08, "epoch": 19.136546184738958, "percentage": 95.68, "elapsed_time": "0:24:46", "remaining_time": "0:01:07", "throughput": 4002.2, "total_tokens": 5950592}
|
| 1916 |
+
{"current_steps": 9535, "total_steps": 9960, "loss": 0.0, "lr": 5.562222980225907e-08, "epoch": 19.146586345381525, "percentage": 95.73, "elapsed_time": "0:24:47", "remaining_time": "0:01:06", "throughput": 4002.26, "total_tokens": 5952960}
|
| 1917 |
+
{"current_steps": 9540, "total_steps": 9960, "loss": 0.0, "lr": 5.432656189130137e-08, "epoch": 19.156626506024097, "percentage": 95.78, "elapsed_time": "0:24:48", "remaining_time": "0:01:05", "throughput": 4002.45, "total_tokens": 5956288}
|
| 1918 |
+
{"current_steps": 9545, "total_steps": 9960, "loss": 0.0, "lr": 5.3046080609352455e-08, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:24:48", "remaining_time": "0:01:04", "throughput": 4002.47, "total_tokens": 5958752}
|
| 1919 |
+
{"current_steps": 9550, "total_steps": 9960, "loss": 0.0, "lr": 5.178078988837432e-08, "epoch": 19.176706827309236, "percentage": 95.88, "elapsed_time": "0:24:49", "remaining_time": "0:01:03", "throughput": 4002.57, "total_tokens": 5962144}
|
| 1920 |
+
{"current_steps": 9555, "total_steps": 9960, "loss": 0.0, "lr": 5.053069361368068e-08, "epoch": 19.186746987951807, "percentage": 95.93, "elapsed_time": "0:24:50", "remaining_time": "0:01:03", "throughput": 4002.78, "total_tokens": 5965280}
|
| 1921 |
+
{"current_steps": 9560, "total_steps": 9960, "loss": 0.0, "lr": 4.9295795623930945e-08, "epoch": 19.196787148594378, "percentage": 95.98, "elapsed_time": "0:24:50", "remaining_time": "0:01:02", "throughput": 4002.86, "total_tokens": 5968192}
|
| 1922 |
+
{"current_steps": 9565, "total_steps": 9960, "loss": 0.0, "lr": 4.807609971111238e-08, "epoch": 19.20682730923695, "percentage": 96.03, "elapsed_time": "0:24:51", "remaining_time": "0:01:01", "throughput": 4002.97, "total_tokens": 5971264}
|
| 1923 |
+
{"current_steps": 9570, "total_steps": 9960, "loss": 0.0, "lr": 4.68716096205335e-08, "epoch": 19.216867469879517, "percentage": 96.08, "elapsed_time": "0:24:52", "remaining_time": "0:01:00", "throughput": 4002.96, "total_tokens": 5973344}
|
| 1924 |
+
{"current_steps": 9575, "total_steps": 9960, "loss": 0.0, "lr": 4.5682329050810715e-08, "epoch": 19.22690763052209, "percentage": 96.13, "elapsed_time": "0:24:53", "remaining_time": "0:01:00", "throughput": 4003.19, "total_tokens": 5977248}
|
| 1925 |
+
{"current_steps": 9580, "total_steps": 9960, "loss": 0.0, "lr": 4.450826165385336e-08, "epoch": 19.23694779116466, "percentage": 96.18, "elapsed_time": "0:24:53", "remaining_time": "0:00:59", "throughput": 4003.37, "total_tokens": 5980704}
|
| 1926 |
+
{"current_steps": 9585, "total_steps": 9960, "loss": 0.0, "lr": 4.33494110348609e-08, "epoch": 19.246987951807228, "percentage": 96.23, "elapsed_time": "0:24:54", "remaining_time": "0:00:58", "throughput": 4003.53, "total_tokens": 5983936}
|
| 1927 |
+
{"current_steps": 9590, "total_steps": 9960, "loss": 0.0, "lr": 4.2205780752301865e-08, "epoch": 19.2570281124498, "percentage": 96.29, "elapsed_time": "0:24:55", "remaining_time": "0:00:57", "throughput": 4003.7, "total_tokens": 5987424}
|
| 1928 |
+
{"current_steps": 9595, "total_steps": 9960, "loss": 0.0, "lr": 4.107737431791159e-08, "epoch": 19.26706827309237, "percentage": 96.34, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.68, "total_tokens": 5990112}
|
| 1929 |
+
{"current_steps": 9600, "total_steps": 9960, "loss": 0.0, "lr": 3.996419519667505e-08, "epoch": 19.27710843373494, "percentage": 96.39, "elapsed_time": "0:24:56", "remaining_time": "0:00:56", "throughput": 4003.81, "total_tokens": 5993472}
|
| 1930 |
+
{"current_steps": 9605, "total_steps": 9960, "loss": 0.0, "lr": 3.8866246806821273e-08, "epoch": 19.28714859437751, "percentage": 96.44, "elapsed_time": "0:24:57", "remaining_time": "0:00:55", "throughput": 4003.84, "total_tokens": 5996320}
|
| 1931 |
+
{"current_steps": 9610, "total_steps": 9960, "loss": 0.0, "lr": 3.7783532519808376e-08, "epoch": 19.29718875502008, "percentage": 96.49, "elapsed_time": "0:24:58", "remaining_time": "0:00:54", "throughput": 4003.92, "total_tokens": 5999360}
|
| 1932 |
+
{"current_steps": 9615, "total_steps": 9960, "loss": 0.0, "lr": 3.671605566031633e-08, "epoch": 19.30722891566265, "percentage": 96.54, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.93, "total_tokens": 6002016}
|
| 1933 |
+
{"current_steps": 9620, "total_steps": 9960, "loss": 0.0, "lr": 3.566381950623588e-08, "epoch": 19.31726907630522, "percentage": 96.59, "elapsed_time": "0:24:59", "remaining_time": "0:00:53", "throughput": 4003.95, "total_tokens": 6004448}
|
| 1934 |
+
{"current_steps": 9625, "total_steps": 9960, "loss": 0.0, "lr": 3.462682728865685e-08, "epoch": 19.32730923694779, "percentage": 96.64, "elapsed_time": "0:25:00", "remaining_time": "0:00:52", "throughput": 4004.09, "total_tokens": 6007392}
|
| 1935 |
+
{"current_steps": 9630, "total_steps": 9960, "loss": 0.0, "lr": 3.3605082191860985e-08, "epoch": 19.337349397590362, "percentage": 96.69, "elapsed_time": "0:25:00", "remaining_time": "0:00:51", "throughput": 4004.16, "total_tokens": 6010176}
|
| 1936 |
+
{"current_steps": 9635, "total_steps": 9960, "loss": 0.0, "lr": 3.259858735331134e-08, "epoch": 19.347389558232933, "percentage": 96.74, "elapsed_time": "0:25:01", "remaining_time": "0:00:50", "throughput": 4004.33, "total_tokens": 6013120}
|
| 1937 |
+
{"current_steps": 9640, "total_steps": 9960, "loss": 0.0, "lr": 3.1607345863640114e-08, "epoch": 19.3574297188755, "percentage": 96.79, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.35, "total_tokens": 6015296}
|
| 1938 |
+
{"current_steps": 9645, "total_steps": 9960, "loss": 0.0001, "lr": 3.063136076664364e-08, "epoch": 19.367469879518072, "percentage": 96.84, "elapsed_time": "0:25:02", "remaining_time": "0:00:49", "throughput": 4004.39, "total_tokens": 6018016}
|
| 1939 |
+
{"current_steps": 9650, "total_steps": 9960, "loss": 0.0474, "lr": 2.967063505926848e-08, "epoch": 19.377510040160644, "percentage": 96.89, "elapsed_time": "0:25:03", "remaining_time": "0:00:48", "throughput": 4004.49, "total_tokens": 6021408}
|
| 1940 |
+
{"current_steps": 9655, "total_steps": 9960, "loss": 0.0, "lr": 2.8725171691605934e-08, "epoch": 19.38755020080321, "percentage": 96.94, "elapsed_time": "0:25:04", "remaining_time": "0:00:47", "throughput": 4004.63, "total_tokens": 6024544}
|
| 1941 |
+
{"current_steps": 9660, "total_steps": 9960, "loss": 0.0, "lr": 2.7794973566880323e-08, "epoch": 19.397590361445783, "percentage": 96.99, "elapsed_time": "0:25:05", "remaining_time": "0:00:46", "throughput": 4004.83, "total_tokens": 6027872}
|
| 1942 |
+
{"current_steps": 9665, "total_steps": 9960, "loss": 0.0, "lr": 2.6880043541441804e-08, "epoch": 19.407630522088354, "percentage": 97.04, "elapsed_time": "0:25:05", "remaining_time": "0:00:45", "throughput": 4004.92, "total_tokens": 6030784}
|
| 1943 |
+
{"current_steps": 9670, "total_steps": 9960, "loss": 0.0, "lr": 2.5980384424756366e-08, "epoch": 19.417670682730925, "percentage": 97.09, "elapsed_time": "0:25:06", "remaining_time": "0:00:45", "throughput": 4005.08, "total_tokens": 6034208}
|
| 1944 |
+
{"current_steps": 9675, "total_steps": 9960, "loss": 0.0001, "lr": 2.5095998979398628e-08, "epoch": 19.427710843373493, "percentage": 97.14, "elapsed_time": "0:25:07", "remaining_time": "0:00:44", "throughput": 4005.25, "total_tokens": 6037632}
|
| 1945 |
+
{"current_steps": 9680, "total_steps": 9960, "loss": 0.0, "lr": 2.4226889921041273e-08, "epoch": 19.437751004016064, "percentage": 97.19, "elapsed_time": "0:25:08", "remaining_time": "0:00:43", "throughput": 4005.45, "total_tokens": 6040928}
|
| 1946 |
+
{"current_steps": 9685, "total_steps": 9960, "loss": 0.0, "lr": 2.3373059918448958e-08, "epoch": 19.447791164658636, "percentage": 97.24, "elapsed_time": "0:25:08", "remaining_time": "0:00:42", "throughput": 4005.52, "total_tokens": 6044096}
|
| 1947 |
+
{"current_steps": 9690, "total_steps": 9960, "loss": 0.0, "lr": 2.2534511593468866e-08, "epoch": 19.457831325301203, "percentage": 97.29, "elapsed_time": "0:25:09", "remaining_time": "0:00:42", "throughput": 4005.71, "total_tokens": 6047456}
|
| 1948 |
+
{"current_steps": 9695, "total_steps": 9960, "loss": 0.0, "lr": 2.171124752102238e-08, "epoch": 19.467871485943775, "percentage": 97.34, "elapsed_time": "0:25:10", "remaining_time": "0:00:41", "throughput": 4005.72, "total_tokens": 6049856}
|
| 1949 |
+
{"current_steps": 9700, "total_steps": 9960, "loss": 0.0, "lr": 2.0903270229098992e-08, "epoch": 19.477911646586346, "percentage": 97.39, "elapsed_time": "0:25:10", "remaining_time": "0:00:40", "throughput": 4005.83, "total_tokens": 6052704}
|
| 1950 |
+
{"current_steps": 9705, "total_steps": 9960, "loss": 0.0, "lr": 2.0110582198745177e-08, "epoch": 19.487951807228917, "percentage": 97.44, "elapsed_time": "0:25:11", "remaining_time": "0:00:39", "throughput": 4005.95, "total_tokens": 6055456}
|
| 1951 |
+
{"current_steps": 9710, "total_steps": 9960, "loss": 0.0, "lr": 1.9333185864061077e-08, "epoch": 19.497991967871485, "percentage": 97.49, "elapsed_time": "0:25:12", "remaining_time": "0:00:38", "throughput": 4006.01, "total_tokens": 6058304}
|
| 1952 |
+
{"current_steps": 9715, "total_steps": 9960, "loss": 0.0, "lr": 1.8571083612188845e-08, "epoch": 19.508032128514056, "percentage": 97.54, "elapsed_time": "0:25:13", "remaining_time": "0:00:38", "throughput": 4006.09, "total_tokens": 6061440}
|
| 1953 |
+
{"current_steps": 9720, "total_steps": 9960, "loss": 0.0, "lr": 1.7824277783308197e-08, "epoch": 19.518072289156628, "percentage": 97.59, "elapsed_time": "0:25:13", "remaining_time": "0:00:37", "throughput": 4006.27, "total_tokens": 6065024}
|
| 1954 |
+
{"current_steps": 9725, "total_steps": 9960, "loss": 0.0, "lr": 1.7092770670628644e-08, "epoch": 19.528112449799195, "percentage": 97.64, "elapsed_time": "0:25:14", "remaining_time": "0:00:36", "throughput": 4006.44, "total_tokens": 6068256}
|
| 1955 |
+
{"current_steps": 9730, "total_steps": 9960, "loss": 0.0, "lr": 1.637656452038172e-08, "epoch": 19.538152610441767, "percentage": 97.69, "elapsed_time": "0:25:15", "remaining_time": "0:00:35", "throughput": 4006.6, "total_tokens": 6071200}
|
| 1956 |
+
{"current_steps": 9735, "total_steps": 9960, "loss": 0.0, "lr": 1.5675661531813215e-08, "epoch": 19.548192771084338, "percentage": 97.74, "elapsed_time": "0:25:16", "remaining_time": "0:00:35", "throughput": 4006.7, "total_tokens": 6074656}
|
| 1957 |
+
{"current_steps": 9740, "total_steps": 9960, "loss": 0.0, "lr": 1.4990063857180383e-08, "epoch": 19.55823293172691, "percentage": 97.79, "elapsed_time": "0:25:16", "remaining_time": "0:00:34", "throughput": 4006.74, "total_tokens": 6077408}
|
| 1958 |
+
{"current_steps": 9745, "total_steps": 9960, "loss": 0.0, "lr": 1.431977360173975e-08, "epoch": 19.568273092369477, "percentage": 97.84, "elapsed_time": "0:25:17", "remaining_time": "0:00:33", "throughput": 4006.82, "total_tokens": 6080352}
|
| 1959 |
+
{"current_steps": 9750, "total_steps": 9960, "loss": 0.0, "lr": 1.3664792823745442e-08, "epoch": 19.57831325301205, "percentage": 97.89, "elapsed_time": "0:25:18", "remaining_time": "0:00:32", "throughput": 4006.86, "total_tokens": 6082848}
|
| 1960 |
+
{"current_steps": 9755, "total_steps": 9960, "loss": 0.0, "lr": 1.3025123534440299e-08, "epoch": 19.58835341365462, "percentage": 97.94, "elapsed_time": "0:25:18", "remaining_time": "0:00:31", "throughput": 4006.99, "total_tokens": 6085664}
|
| 1961 |
+
{"current_steps": 9760, "total_steps": 9960, "loss": 0.0, "lr": 1.240076769804921e-08, "epoch": 19.598393574297187, "percentage": 97.99, "elapsed_time": "0:25:19", "remaining_time": "0:00:31", "throughput": 4007.02, "total_tokens": 6088608}
|
| 1962 |
+
{"current_steps": 9765, "total_steps": 9960, "loss": 0.0, "lr": 1.1791727231776906e-08, "epoch": 19.60843373493976, "percentage": 98.04, "elapsed_time": "0:25:20", "remaining_time": "0:00:30", "throughput": 4007.05, "total_tokens": 6091296}
|
| 1963 |
+
{"current_steps": 9770, "total_steps": 9960, "loss": 0.0, "lr": 1.1198004005796847e-08, "epoch": 19.61847389558233, "percentage": 98.09, "elapsed_time": "0:25:20", "remaining_time": "0:00:29", "throughput": 4007.18, "total_tokens": 6094880}
|
| 1964 |
+
{"current_steps": 9775, "total_steps": 9960, "loss": 0.0, "lr": 1.0619599843249006e-08, "epoch": 19.6285140562249, "percentage": 98.14, "elapsed_time": "0:25:21", "remaining_time": "0:00:28", "throughput": 4007.33, "total_tokens": 6098208}
|
| 1965 |
+
{"current_steps": 9780, "total_steps": 9960, "loss": 0.0, "lr": 1.0056516520232651e-08, "epoch": 19.63855421686747, "percentage": 98.19, "elapsed_time": "0:25:22", "remaining_time": "0:00:28", "throughput": 4007.36, "total_tokens": 6101024}
|
| 1966 |
+
{"current_steps": 9785, "total_steps": 9960, "loss": 0.0, "lr": 9.508755765802457e-09, "epoch": 19.64859437751004, "percentage": 98.24, "elapsed_time": "0:25:23", "remaining_time": "0:00:27", "throughput": 4007.44, "total_tokens": 6103904}
|
| 1967 |
+
{"current_steps": 9790, "total_steps": 9960, "loss": 0.0, "lr": 8.976319261962407e-09, "epoch": 19.65863453815261, "percentage": 98.29, "elapsed_time": "0:25:23", "remaining_time": "0:00:26", "throughput": 4007.57, "total_tokens": 6106816}
|
| 1968 |
+
{"current_steps": 9795, "total_steps": 9960, "loss": 0.0, "lr": 8.459208643659122e-09, "epoch": 19.66867469879518, "percentage": 98.34, "elapsed_time": "0:25:24", "remaining_time": "0:00:25", "throughput": 4007.72, "total_tokens": 6110368}
|
| 1969 |
+
{"current_steps": 9800, "total_steps": 9960, "loss": 0.0, "lr": 7.957425498778537e-09, "epoch": 19.67871485943775, "percentage": 98.39, "elapsed_time": "0:25:25", "remaining_time": "0:00:24", "throughput": 4007.88, "total_tokens": 6113856}
|
| 1970 |
+
{"current_steps": 9805, "total_steps": 9960, "loss": 0.0, "lr": 7.470971368142011e-09, "epoch": 19.688755020080322, "percentage": 98.44, "elapsed_time": "0:25:26", "remaining_time": "0:00:24", "throughput": 4007.94, "total_tokens": 6116448}
|
| 1971 |
+
{"current_steps": 9810, "total_steps": 9960, "loss": 0.0, "lr": 6.999847745498556e-09, "epoch": 19.698795180722893, "percentage": 98.49, "elapsed_time": "0:25:26", "remaining_time": "0:00:23", "throughput": 4008.19, "total_tokens": 6120096}
|
| 1972 |
+
{"current_steps": 9815, "total_steps": 9960, "loss": 0.0, "lr": 6.544056077523175e-09, "epoch": 19.70883534136546, "percentage": 98.54, "elapsed_time": "0:25:27", "remaining_time": "0:00:22", "throughput": 4008.25, "total_tokens": 6123008}
|
| 1973 |
+
{"current_steps": 9820, "total_steps": 9960, "loss": 0.0, "lr": 6.1035977638101985e-09, "epoch": 19.718875502008032, "percentage": 98.59, "elapsed_time": "0:25:28", "remaining_time": "0:00:21", "throughput": 4008.44, "total_tokens": 6126720}
|
| 1974 |
+
{"current_steps": 9825, "total_steps": 9960, "loss": 0.0, "lr": 5.678474156871061e-09, "epoch": 19.728915662650603, "percentage": 98.64, "elapsed_time": "0:25:29", "remaining_time": "0:00:21", "throughput": 4008.51, "total_tokens": 6129760}
|
| 1975 |
+
{"current_steps": 9830, "total_steps": 9960, "loss": 0.0, "lr": 5.268686562127645e-09, "epoch": 19.73895582329317, "percentage": 98.69, "elapsed_time": "0:25:30", "remaining_time": "0:00:20", "throughput": 4008.67, "total_tokens": 6133344}
|
| 1976 |
+
{"current_steps": 9835, "total_steps": 9960, "loss": 0.0, "lr": 4.874236237911723e-09, "epoch": 19.748995983935743, "percentage": 98.74, "elapsed_time": "0:25:30", "remaining_time": "0:00:19", "throughput": 4008.84, "total_tokens": 6136576}
|
| 1977 |
+
{"current_steps": 9840, "total_steps": 9960, "loss": 0.0, "lr": 4.495124395456629e-09, "epoch": 19.759036144578314, "percentage": 98.8, "elapsed_time": "0:25:31", "remaining_time": "0:00:18", "throughput": 4008.86, "total_tokens": 6139136}
|
| 1978 |
+
{"current_steps": 9845, "total_steps": 9960, "loss": 0.0, "lr": 4.1313521988983754e-09, "epoch": 19.769076305220885, "percentage": 98.85, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4008.98, "total_tokens": 6142240}
|
| 1979 |
+
{"current_steps": 9850, "total_steps": 9960, "loss": 0.0, "lr": 3.7829207652673175e-09, "epoch": 19.779116465863453, "percentage": 98.9, "elapsed_time": "0:25:32", "remaining_time": "0:00:17", "throughput": 4009.04, "total_tokens": 6145088}
|
| 1980 |
+
{"current_steps": 9855, "total_steps": 9960, "loss": 0.0, "lr": 3.44983116448927e-09, "epoch": 19.789156626506024, "percentage": 98.95, "elapsed_time": "0:25:33", "remaining_time": "0:00:16", "throughput": 4009.11, "total_tokens": 6147968}
|
| 1981 |
+
{"current_steps": 9860, "total_steps": 9960, "loss": 0.0, "lr": 3.1320844193788445e-09, "epoch": 19.799196787148595, "percentage": 99.0, "elapsed_time": "0:25:34", "remaining_time": "0:00:15", "throughput": 4009.28, "total_tokens": 6151296}
|
| 1982 |
+
{"current_steps": 9865, "total_steps": 9960, "loss": 0.0, "lr": 2.8296815056377824e-09, "epoch": 19.809236947791163, "percentage": 99.05, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.54, "total_tokens": 6154880}
|
| 1983 |
+
{"current_steps": 9870, "total_steps": 9960, "loss": 0.0, "lr": 2.54262335185107e-09, "epoch": 19.819277108433734, "percentage": 99.1, "elapsed_time": "0:25:35", "remaining_time": "0:00:14", "throughput": 4009.68, "total_tokens": 6158528}
|
| 1984 |
+
{"current_steps": 9875, "total_steps": 9960, "loss": 0.0, "lr": 2.2709108394863845e-09, "epoch": 19.829317269076306, "percentage": 99.15, "elapsed_time": "0:25:36", "remaining_time": "0:00:13", "throughput": 4009.81, "total_tokens": 6161600}
|
| 1985 |
+
{"current_steps": 9880, "total_steps": 9960, "loss": 0.0002, "lr": 2.0145448028874305e-09, "epoch": 19.839357429718877, "percentage": 99.2, "elapsed_time": "0:25:37", "remaining_time": "0:00:12", "throughput": 4009.85, "total_tokens": 6164288}
|
| 1986 |
+
{"current_steps": 9885, "total_steps": 9960, "loss": 0.0, "lr": 1.7735260292750522e-09, "epoch": 19.849397590361445, "percentage": 99.25, "elapsed_time": "0:25:38", "remaining_time": "0:00:11", "throughput": 4010.07, "total_tokens": 6167904}
|
| 1987 |
+
{"current_steps": 9890, "total_steps": 9960, "loss": 0.0, "lr": 1.547855258743347e-09, "epoch": 19.859437751004016, "percentage": 99.3, "elapsed_time": "0:25:38", "remaining_time": "0:00:10", "throughput": 4010.23, "total_tokens": 6171360}
|
| 1988 |
+
{"current_steps": 9895, "total_steps": 9960, "loss": 0.0, "lr": 1.3375331842574446e-09, "epoch": 19.869477911646587, "percentage": 99.35, "elapsed_time": "0:25:39", "remaining_time": "0:00:10", "throughput": 4010.4, "total_tokens": 6174848}
|
| 1989 |
+
{"current_steps": 9900, "total_steps": 9960, "loss": 0.0, "lr": 1.1425604516512868e-09, "epoch": 19.879518072289155, "percentage": 99.4, "elapsed_time": "0:25:40", "remaining_time": "0:00:09", "throughput": 4010.34, "total_tokens": 6177472}
|
| 1990 |
+
{"current_steps": 9905, "total_steps": 9960, "loss": 0.0, "lr": 9.629376596248518e-10, "epoch": 19.889558232931726, "percentage": 99.45, "elapsed_time": "0:25:41", "remaining_time": "0:00:08", "throughput": 4010.51, "total_tokens": 6180576}
|
| 1991 |
+
{"current_steps": 9910, "total_steps": 9960, "loss": 0.0, "lr": 7.986653597447102e-10, "epoch": 19.899598393574298, "percentage": 99.5, "elapsed_time": "0:25:41", "remaining_time": "0:00:07", "throughput": 4010.6, "total_tokens": 6183520}
|
| 1992 |
+
{"current_steps": 9915, "total_steps": 9960, "loss": 0.0, "lr": 6.497440564395829e-10, "epoch": 19.90963855421687, "percentage": 99.55, "elapsed_time": "0:25:42", "remaining_time": "0:00:07", "throughput": 4010.98, "total_tokens": 6188000}
|
| 1993 |
+
{"current_steps": 9920, "total_steps": 9960, "loss": 0.0, "lr": 5.161742070014519e-10, "epoch": 19.919678714859437, "percentage": 99.6, "elapsed_time": "0:25:43", "remaining_time": "0:00:06", "throughput": 4011.0, "total_tokens": 6190560}
|
| 1994 |
+
{"current_steps": 9925, "total_steps": 9960, "loss": 0.0, "lr": 3.9795622158111945e-10, "epoch": 19.929718875502008, "percentage": 99.65, "elapsed_time": "0:25:44", "remaining_time": "0:00:05", "throughput": 4011.2, "total_tokens": 6194080}
|
| 1995 |
+
{"current_steps": 9930, "total_steps": 9960, "loss": 0.0, "lr": 2.950904631893181e-10, "epoch": 19.93975903614458, "percentage": 99.7, "elapsed_time": "0:25:45", "remaining_time": "0:00:04", "throughput": 4011.29, "total_tokens": 6197728}
|
| 1996 |
+
{"current_steps": 9935, "total_steps": 9960, "loss": 0.0, "lr": 2.0757724769560062e-10, "epoch": 19.949799196787147, "percentage": 99.75, "elapsed_time": "0:25:45", "remaining_time": "0:00:03", "throughput": 4011.37, "total_tokens": 6201088}
|
| 1997 |
+
{"current_steps": 9940, "total_steps": 9960, "loss": 0.0, "lr": 1.354168438255643e-10, "epoch": 19.95983935742972, "percentage": 99.8, "elapsed_time": "0:25:46", "remaining_time": "0:00:03", "throughput": 4011.54, "total_tokens": 6204672}
|
| 1998 |
+
{"current_steps": 9945, "total_steps": 9960, "loss": 0.0, "lr": 7.860947316140621e-11, "epoch": 19.96987951807229, "percentage": 99.85, "elapsed_time": "0:25:47", "remaining_time": "0:00:02", "throughput": 4011.62, "total_tokens": 6207360}
|
| 1999 |
+
{"current_steps": 9950, "total_steps": 9960, "loss": 0.0, "lr": 3.715531014025775e-11, "epoch": 19.97991967871486, "percentage": 99.9, "elapsed_time": "0:25:48", "remaining_time": "0:00:01", "throughput": 4011.78, "total_tokens": 6210368}
|
| 2000 |
+
{"current_steps": 9955, "total_steps": 9960, "loss": 0.0, "lr": 1.1054482056405136e-11, "epoch": 19.98995983935743, "percentage": 99.95, "elapsed_time": "0:25:48", "remaining_time": "0:00:00", "throughput": 4011.77, "total_tokens": 6212800}
|
| 2001 |
+
{"current_steps": 9960, "total_steps": 9960, "loss": 0.0001, "lr": 3.0706905573829603e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:49", "remaining_time": "0:00:00", "throughput": 4011.79, "total_tokens": 6215968}
|
| 2002 |
+
{"current_steps": 9960, "total_steps": 9960, "eval_loss": 1.102670431137085, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:57", "remaining_time": "0:00:00", "throughput": 3990.99, "total_tokens": 6215968}
|
| 2003 |
+
{"current_steps": 9960, "total_steps": 9960, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:58", "remaining_time": "0:00:00", "throughput": 3987.92, "total_tokens": 6215968}
|