rbelanec commited on
Commit
c8a1ffc
·
verified ·
1 Parent(s): 50d8f7f

Training in progress, step 9912

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +166 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85e863166ddb7d6d2be89c4c8927d55e48d678de9ebe443e6fd94e1338f6a32f
3
  size 541712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac0bad24df51d3d5e4d3a6d251b3d234cadc5ccdc39c486f625b6fe06d9716a
3
  size 541712
trainer_log.jsonl CHANGED
@@ -1831,3 +1831,169 @@
1831
  {"current_steps": 9100, "total_steps": 16520, "loss": 0.2946, "lr": 2.4931327930560798e-05, "epoch": 11.016949152542374, "percentage": 55.08, "elapsed_time": "0:24:07", "remaining_time": "0:19:40", "throughput": 2576.21, "total_tokens": 3728280}
1832
  {"current_steps": 9105, "total_steps": 16520, "loss": 0.237, "lr": 2.4904915705830238e-05, "epoch": 11.023002421307506, "percentage": 55.12, "elapsed_time": "0:24:07", "remaining_time": "0:19:39", "throughput": 2576.18, "total_tokens": 3730296}
1833
  {"current_steps": 9110, "total_steps": 16520, "loss": 0.1367, "lr": 2.487850358723097e-05, "epoch": 11.029055690072639, "percentage": 55.15, "elapsed_time": "0:24:08", "remaining_time": "0:19:38", "throughput": 2576.23, "total_tokens": 3732504}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1831
  {"current_steps": 9100, "total_steps": 16520, "loss": 0.2946, "lr": 2.4931327930560798e-05, "epoch": 11.016949152542374, "percentage": 55.08, "elapsed_time": "0:24:07", "remaining_time": "0:19:40", "throughput": 2576.21, "total_tokens": 3728280}
1832
  {"current_steps": 9105, "total_steps": 16520, "loss": 0.237, "lr": 2.4904915705830238e-05, "epoch": 11.023002421307506, "percentage": 55.12, "elapsed_time": "0:24:07", "remaining_time": "0:19:39", "throughput": 2576.18, "total_tokens": 3730296}
1833
  {"current_steps": 9110, "total_steps": 16520, "loss": 0.1367, "lr": 2.487850358723097e-05, "epoch": 11.029055690072639, "percentage": 55.15, "elapsed_time": "0:24:08", "remaining_time": "0:19:38", "throughput": 2576.23, "total_tokens": 3732504}
1834
+ {"current_steps": 9115, "total_steps": 16520, "loss": 0.2302, "lr": 2.4852091604243663e-05, "epoch": 11.035108958837773, "percentage": 55.18, "elapsed_time": "0:24:09", "remaining_time": "0:19:37", "throughput": 2576.33, "total_tokens": 3734616}
1835
+ {"current_steps": 9120, "total_steps": 16520, "loss": 0.1777, "lr": 2.482567978634891e-05, "epoch": 11.041162227602905, "percentage": 55.21, "elapsed_time": "0:24:10", "remaining_time": "0:19:36", "throughput": 2576.43, "total_tokens": 3736728}
1836
+ {"current_steps": 9125, "total_steps": 16520, "loss": 0.157, "lr": 2.479926816302705e-05, "epoch": 11.04721549636804, "percentage": 55.24, "elapsed_time": "0:24:11", "remaining_time": "0:19:36", "throughput": 2576.44, "total_tokens": 3738776}
1837
+ {"current_steps": 9130, "total_steps": 16520, "loss": 0.2194, "lr": 2.4772856763758252e-05, "epoch": 11.053268765133172, "percentage": 55.27, "elapsed_time": "0:24:11", "remaining_time": "0:19:35", "throughput": 2576.57, "total_tokens": 3741016}
1838
+ {"current_steps": 9135, "total_steps": 16520, "loss": 0.3534, "lr": 2.47464456180224e-05, "epoch": 11.059322033898304, "percentage": 55.3, "elapsed_time": "0:24:12", "remaining_time": "0:19:34", "throughput": 2576.7, "total_tokens": 3743096}
1839
+ {"current_steps": 9140, "total_steps": 16520, "loss": 0.2835, "lr": 2.472003475529913e-05, "epoch": 11.065375302663439, "percentage": 55.33, "elapsed_time": "0:24:13", "remaining_time": "0:19:33", "throughput": 2576.8, "total_tokens": 3745400}
1840
+ {"current_steps": 9145, "total_steps": 16520, "loss": 0.255, "lr": 2.4693624205067723e-05, "epoch": 11.071428571428571, "percentage": 55.36, "elapsed_time": "0:24:14", "remaining_time": "0:19:32", "throughput": 2576.83, "total_tokens": 3747480}
1841
+ {"current_steps": 9150, "total_steps": 16520, "loss": 0.3783, "lr": 2.466721399680716e-05, "epoch": 11.077481840193705, "percentage": 55.39, "elapsed_time": "0:24:15", "remaining_time": "0:19:31", "throughput": 2576.89, "total_tokens": 3749528}
1842
+ {"current_steps": 9155, "total_steps": 16520, "loss": 0.267, "lr": 2.464080415999598e-05, "epoch": 11.083535108958838, "percentage": 55.42, "elapsed_time": "0:24:15", "remaining_time": "0:19:31", "throughput": 2576.93, "total_tokens": 3751544}
1843
+ {"current_steps": 9160, "total_steps": 16520, "loss": 0.1631, "lr": 2.4614394724112366e-05, "epoch": 11.08958837772397, "percentage": 55.45, "elapsed_time": "0:24:16", "remaining_time": "0:19:30", "throughput": 2576.93, "total_tokens": 3753592}
1844
+ {"current_steps": 9165, "total_steps": 16520, "loss": 0.2406, "lr": 2.458798571863401e-05, "epoch": 11.095641646489105, "percentage": 55.48, "elapsed_time": "0:24:17", "remaining_time": "0:19:29", "throughput": 2577.0, "total_tokens": 3755736}
1845
+ {"current_steps": 9170, "total_steps": 16520, "loss": 0.2702, "lr": 2.4561577173038164e-05, "epoch": 11.101694915254237, "percentage": 55.51, "elapsed_time": "0:24:18", "remaining_time": "0:19:28", "throughput": 2577.08, "total_tokens": 3757720}
1846
+ {"current_steps": 9175, "total_steps": 16520, "loss": 0.2506, "lr": 2.4535169116801517e-05, "epoch": 11.107748184019371, "percentage": 55.54, "elapsed_time": "0:24:18", "remaining_time": "0:19:27", "throughput": 2577.14, "total_tokens": 3759864}
1847
+ {"current_steps": 9180, "total_steps": 16520, "loss": 0.181, "lr": 2.4508761579400248e-05, "epoch": 11.113801452784504, "percentage": 55.57, "elapsed_time": "0:24:19", "remaining_time": "0:19:27", "throughput": 2577.17, "total_tokens": 3761944}
1848
+ {"current_steps": 9185, "total_steps": 16520, "loss": 0.2833, "lr": 2.4482354590309962e-05, "epoch": 11.119854721549636, "percentage": 55.6, "elapsed_time": "0:24:20", "remaining_time": "0:19:26", "throughput": 2576.63, "total_tokens": 3763960}
1849
+ {"current_steps": 9190, "total_steps": 16520, "loss": 0.212, "lr": 2.4455948179005613e-05, "epoch": 11.12590799031477, "percentage": 55.63, "elapsed_time": "0:24:21", "remaining_time": "0:19:25", "throughput": 2576.68, "total_tokens": 3766168}
1850
+ {"current_steps": 9195, "total_steps": 16520, "loss": 0.3087, "lr": 2.4429542374961552e-05, "epoch": 11.131961259079903, "percentage": 55.66, "elapsed_time": "0:24:22", "remaining_time": "0:19:25", "throughput": 2576.72, "total_tokens": 3768280}
1851
+ {"current_steps": 9200, "total_steps": 16520, "loss": 0.1495, "lr": 2.440313720765142e-05, "epoch": 11.138014527845037, "percentage": 55.69, "elapsed_time": "0:24:23", "remaining_time": "0:19:24", "throughput": 2576.79, "total_tokens": 3770360}
1852
+ {"current_steps": 9205, "total_steps": 16520, "loss": 0.1929, "lr": 2.4376732706548183e-05, "epoch": 11.14406779661017, "percentage": 55.72, "elapsed_time": "0:24:23", "remaining_time": "0:19:23", "throughput": 2576.87, "total_tokens": 3772280}
1853
+ {"current_steps": 9210, "total_steps": 16520, "loss": 0.1942, "lr": 2.435032890112402e-05, "epoch": 11.150121065375302, "percentage": 55.75, "elapsed_time": "0:24:24", "remaining_time": "0:19:22", "throughput": 2576.9, "total_tokens": 3774456}
1854
+ {"current_steps": 9215, "total_steps": 16520, "loss": 0.297, "lr": 2.432392582085037e-05, "epoch": 11.156174334140436, "percentage": 55.78, "elapsed_time": "0:24:25", "remaining_time": "0:19:21", "throughput": 2576.96, "total_tokens": 3776600}
1855
+ {"current_steps": 9220, "total_steps": 16520, "loss": 0.2473, "lr": 2.4297523495197845e-05, "epoch": 11.162227602905569, "percentage": 55.81, "elapsed_time": "0:24:26", "remaining_time": "0:19:20", "throughput": 2577.0, "total_tokens": 3778808}
1856
+ {"current_steps": 9225, "total_steps": 16520, "loss": 0.2929, "lr": 2.427112195363622e-05, "epoch": 11.168280871670703, "percentage": 55.84, "elapsed_time": "0:24:27", "remaining_time": "0:19:20", "throughput": 2577.02, "total_tokens": 3780888}
1857
+ {"current_steps": 9230, "total_steps": 16520, "loss": 0.2248, "lr": 2.4244721225634384e-05, "epoch": 11.174334140435835, "percentage": 55.87, "elapsed_time": "0:24:27", "remaining_time": "0:19:19", "throughput": 2577.07, "total_tokens": 3783000}
1858
+ {"current_steps": 9235, "total_steps": 16520, "loss": 0.2262, "lr": 2.421832134066034e-05, "epoch": 11.180387409200968, "percentage": 55.9, "elapsed_time": "0:24:28", "remaining_time": "0:19:18", "throughput": 2577.16, "total_tokens": 3785016}
1859
+ {"current_steps": 9240, "total_steps": 16520, "loss": 0.1884, "lr": 2.4191922328181118e-05, "epoch": 11.186440677966102, "percentage": 55.93, "elapsed_time": "0:24:29", "remaining_time": "0:19:17", "throughput": 2577.23, "total_tokens": 3787000}
1860
+ {"current_steps": 9245, "total_steps": 16520, "loss": 0.1987, "lr": 2.4165524217662822e-05, "epoch": 11.192493946731235, "percentage": 55.96, "elapsed_time": "0:24:30", "remaining_time": "0:19:16", "throughput": 2577.31, "total_tokens": 3789080}
1861
+ {"current_steps": 9250, "total_steps": 16520, "loss": 0.2583, "lr": 2.4139127038570504e-05, "epoch": 11.198547215496369, "percentage": 55.99, "elapsed_time": "0:24:30", "remaining_time": "0:19:16", "throughput": 2577.29, "total_tokens": 3791192}
1862
+ {"current_steps": 9255, "total_steps": 16520, "loss": 0.3303, "lr": 2.4112730820368176e-05, "epoch": 11.204600484261501, "percentage": 56.02, "elapsed_time": "0:24:31", "remaining_time": "0:19:15", "throughput": 2577.34, "total_tokens": 3793304}
1863
+ {"current_steps": 9260, "total_steps": 16520, "loss": 0.2677, "lr": 2.4086335592518817e-05, "epoch": 11.210653753026634, "percentage": 56.05, "elapsed_time": "0:24:32", "remaining_time": "0:19:14", "throughput": 2577.38, "total_tokens": 3795320}
1864
+ {"current_steps": 9265, "total_steps": 16520, "loss": 0.227, "lr": 2.4059941384484266e-05, "epoch": 11.216707021791768, "percentage": 56.08, "elapsed_time": "0:24:33", "remaining_time": "0:19:13", "throughput": 2577.43, "total_tokens": 3797432}
1865
+ {"current_steps": 9270, "total_steps": 16520, "loss": 0.2638, "lr": 2.4033548225725252e-05, "epoch": 11.2227602905569, "percentage": 56.11, "elapsed_time": "0:24:34", "remaining_time": "0:19:12", "throughput": 2577.46, "total_tokens": 3799608}
1866
+ {"current_steps": 9275, "total_steps": 16520, "loss": 0.2378, "lr": 2.4007156145701294e-05, "epoch": 11.228813559322035, "percentage": 56.14, "elapsed_time": "0:24:34", "remaining_time": "0:19:12", "throughput": 2577.47, "total_tokens": 3801656}
1867
+ {"current_steps": 9280, "total_steps": 16520, "loss": 0.2361, "lr": 2.3980765173870745e-05, "epoch": 11.234866828087167, "percentage": 56.17, "elapsed_time": "0:24:35", "remaining_time": "0:19:11", "throughput": 2577.53, "total_tokens": 3803704}
1868
+ {"current_steps": 9285, "total_steps": 16520, "loss": 0.3316, "lr": 2.3954375339690692e-05, "epoch": 11.2409200968523, "percentage": 56.2, "elapsed_time": "0:24:36", "remaining_time": "0:19:10", "throughput": 2577.54, "total_tokens": 3805592}
1869
+ {"current_steps": 9290, "total_steps": 16520, "loss": 0.3042, "lr": 2.3927986672616985e-05, "epoch": 11.246973365617434, "percentage": 56.23, "elapsed_time": "0:24:37", "remaining_time": "0:19:09", "throughput": 2577.59, "total_tokens": 3807608}
1870
+ {"current_steps": 9295, "total_steps": 16520, "loss": 0.1662, "lr": 2.3901599202104127e-05, "epoch": 11.253026634382566, "percentage": 56.27, "elapsed_time": "0:24:37", "remaining_time": "0:19:08", "throughput": 2577.66, "total_tokens": 3809592}
1871
+ {"current_steps": 9300, "total_steps": 16520, "loss": 0.2321, "lr": 2.387521295760533e-05, "epoch": 11.2590799031477, "percentage": 56.3, "elapsed_time": "0:24:38", "remaining_time": "0:19:07", "throughput": 2577.71, "total_tokens": 3811640}
1872
+ {"current_steps": 9305, "total_steps": 16520, "loss": 0.1614, "lr": 2.3848827968572405e-05, "epoch": 11.265133171912833, "percentage": 56.33, "elapsed_time": "0:24:39", "remaining_time": "0:19:07", "throughput": 2577.85, "total_tokens": 3813720}
1873
+ {"current_steps": 9310, "total_steps": 16520, "loss": 0.1804, "lr": 2.3822444264455793e-05, "epoch": 11.271186440677965, "percentage": 56.36, "elapsed_time": "0:24:40", "remaining_time": "0:19:06", "throughput": 2577.97, "total_tokens": 3815832}
1874
+ {"current_steps": 9315, "total_steps": 16520, "loss": 0.1604, "lr": 2.3796061874704454e-05, "epoch": 11.2772397094431, "percentage": 56.39, "elapsed_time": "0:24:40", "remaining_time": "0:19:05", "throughput": 2577.97, "total_tokens": 3817880}
1875
+ {"current_steps": 9320, "total_steps": 16520, "loss": 0.155, "lr": 2.3769680828765932e-05, "epoch": 11.283292978208232, "percentage": 56.42, "elapsed_time": "0:24:41", "remaining_time": "0:19:04", "throughput": 2578.03, "total_tokens": 3819928}
1876
+ {"current_steps": 9325, "total_steps": 16520, "loss": 0.2703, "lr": 2.3743301156086244e-05, "epoch": 11.289346246973366, "percentage": 56.45, "elapsed_time": "0:24:42", "remaining_time": "0:19:03", "throughput": 2578.07, "total_tokens": 3822040}
1877
+ {"current_steps": 9330, "total_steps": 16520, "loss": 0.1653, "lr": 2.3716922886109864e-05, "epoch": 11.295399515738499, "percentage": 56.48, "elapsed_time": "0:24:43", "remaining_time": "0:19:03", "throughput": 2578.11, "total_tokens": 3824056}
1878
+ {"current_steps": 9335, "total_steps": 16520, "loss": 0.1668, "lr": 2.3690546048279728e-05, "epoch": 11.301452784503631, "percentage": 56.51, "elapsed_time": "0:24:44", "remaining_time": "0:19:02", "throughput": 2578.17, "total_tokens": 3826168}
1879
+ {"current_steps": 9340, "total_steps": 16520, "loss": 0.15, "lr": 2.3664170672037154e-05, "epoch": 11.307506053268765, "percentage": 56.54, "elapsed_time": "0:24:44", "remaining_time": "0:19:01", "throughput": 2578.25, "total_tokens": 3828440}
1880
+ {"current_steps": 9345, "total_steps": 16520, "loss": 0.2056, "lr": 2.363779678682185e-05, "epoch": 11.313559322033898, "percentage": 56.57, "elapsed_time": "0:24:45", "remaining_time": "0:19:00", "throughput": 2578.36, "total_tokens": 3830392}
1881
+ {"current_steps": 9350, "total_steps": 16520, "loss": 0.2826, "lr": 2.3611424422071816e-05, "epoch": 11.319612590799032, "percentage": 56.6, "elapsed_time": "0:24:46", "remaining_time": "0:18:59", "throughput": 2578.4, "total_tokens": 3832408}
1882
+ {"current_steps": 9355, "total_steps": 16520, "loss": 0.2071, "lr": 2.358505360722341e-05, "epoch": 11.325665859564165, "percentage": 56.63, "elapsed_time": "0:24:47", "remaining_time": "0:18:58", "throughput": 2578.52, "total_tokens": 3834488}
1883
+ {"current_steps": 9360, "total_steps": 16520, "loss": 0.1885, "lr": 2.3558684371711215e-05, "epoch": 11.331719128329297, "percentage": 56.66, "elapsed_time": "0:24:47", "remaining_time": "0:18:58", "throughput": 2578.56, "total_tokens": 3836504}
1884
+ {"current_steps": 9365, "total_steps": 16520, "loss": 0.1262, "lr": 2.3532316744968088e-05, "epoch": 11.337772397094431, "percentage": 56.69, "elapsed_time": "0:24:48", "remaining_time": "0:18:57", "throughput": 2578.67, "total_tokens": 3838648}
1885
+ {"current_steps": 9370, "total_steps": 16520, "loss": 0.1869, "lr": 2.3505950756425052e-05, "epoch": 11.343825665859564, "percentage": 56.72, "elapsed_time": "0:24:49", "remaining_time": "0:18:56", "throughput": 2578.65, "total_tokens": 3840728}
1886
+ {"current_steps": 9375, "total_steps": 16520, "loss": 0.2304, "lr": 2.3479586435511337e-05, "epoch": 11.349878934624698, "percentage": 56.75, "elapsed_time": "0:24:50", "remaining_time": "0:18:55", "throughput": 2578.68, "total_tokens": 3842648}
1887
+ {"current_steps": 9380, "total_steps": 16520, "loss": 0.2994, "lr": 2.3453223811654282e-05, "epoch": 11.35593220338983, "percentage": 56.78, "elapsed_time": "0:24:50", "remaining_time": "0:18:54", "throughput": 2578.75, "total_tokens": 3844632}
1888
+ {"current_steps": 9385, "total_steps": 16520, "loss": 0.2831, "lr": 2.3426862914279364e-05, "epoch": 11.361985472154963, "percentage": 56.81, "elapsed_time": "0:24:51", "remaining_time": "0:18:54", "throughput": 2578.83, "total_tokens": 3846616}
1889
+ {"current_steps": 9390, "total_steps": 16520, "loss": 0.1108, "lr": 2.340050377281009e-05, "epoch": 11.368038740920097, "percentage": 56.84, "elapsed_time": "0:24:52", "remaining_time": "0:18:53", "throughput": 2578.83, "total_tokens": 3848568}
1890
+ {"current_steps": 9395, "total_steps": 16520, "loss": 0.2087, "lr": 2.3374146416668063e-05, "epoch": 11.37409200968523, "percentage": 56.87, "elapsed_time": "0:24:53", "remaining_time": "0:18:52", "throughput": 2578.88, "total_tokens": 3850680}
1891
+ {"current_steps": 9400, "total_steps": 16520, "loss": 0.3217, "lr": 2.3347790875272822e-05, "epoch": 11.380145278450364, "percentage": 56.9, "elapsed_time": "0:24:53", "remaining_time": "0:18:51", "throughput": 2578.99, "total_tokens": 3852760}
1892
+ {"current_steps": 9405, "total_steps": 16520, "loss": 0.3579, "lr": 2.3321437178041966e-05, "epoch": 11.386198547215496, "percentage": 56.93, "elapsed_time": "0:24:54", "remaining_time": "0:18:50", "throughput": 2579.02, "total_tokens": 3854744}
1893
+ {"current_steps": 9410, "total_steps": 16520, "loss": 0.1418, "lr": 2.3295085354390964e-05, "epoch": 11.392251815980629, "percentage": 56.96, "elapsed_time": "0:24:55", "remaining_time": "0:18:49", "throughput": 2579.06, "total_tokens": 3856920}
1894
+ {"current_steps": 9415, "total_steps": 16520, "loss": 0.2682, "lr": 2.326873543373321e-05, "epoch": 11.398305084745763, "percentage": 56.99, "elapsed_time": "0:24:56", "remaining_time": "0:18:49", "throughput": 2579.13, "total_tokens": 3859096}
1895
+ {"current_steps": 9420, "total_steps": 16520, "loss": 0.2371, "lr": 2.324238744548e-05, "epoch": 11.404358353510895, "percentage": 57.02, "elapsed_time": "0:24:56", "remaining_time": "0:18:48", "throughput": 2579.23, "total_tokens": 3861016}
1896
+ {"current_steps": 9425, "total_steps": 16520, "loss": 0.1922, "lr": 2.3216041419040455e-05, "epoch": 11.41041162227603, "percentage": 57.05, "elapsed_time": "0:24:57", "remaining_time": "0:18:47", "throughput": 2579.27, "total_tokens": 3863128}
1897
+ {"current_steps": 9430, "total_steps": 16520, "loss": 0.2147, "lr": 2.3189697383821512e-05, "epoch": 11.416464891041162, "percentage": 57.08, "elapsed_time": "0:24:58", "remaining_time": "0:18:46", "throughput": 2579.33, "total_tokens": 3865176}
1898
+ {"current_steps": 9435, "total_steps": 16520, "loss": 0.1497, "lr": 2.3163355369227865e-05, "epoch": 11.422518159806295, "percentage": 57.11, "elapsed_time": "0:24:59", "remaining_time": "0:18:45", "throughput": 2579.38, "total_tokens": 3867192}
1899
+ {"current_steps": 9440, "total_steps": 16520, "loss": 0.195, "lr": 2.3137015404661988e-05, "epoch": 11.428571428571429, "percentage": 57.14, "elapsed_time": "0:25:00", "remaining_time": "0:18:45", "throughput": 2579.43, "total_tokens": 3869336}
1900
+ {"current_steps": 9445, "total_steps": 16520, "loss": 0.3264, "lr": 2.3110677519524032e-05, "epoch": 11.434624697336561, "percentage": 57.17, "elapsed_time": "0:25:00", "remaining_time": "0:18:44", "throughput": 2579.39, "total_tokens": 3871256}
1901
+ {"current_steps": 9450, "total_steps": 16520, "loss": 0.2384, "lr": 2.308434174321186e-05, "epoch": 11.440677966101696, "percentage": 57.2, "elapsed_time": "0:25:01", "remaining_time": "0:18:43", "throughput": 2579.47, "total_tokens": 3873240}
1902
+ {"current_steps": 9455, "total_steps": 16520, "loss": 0.2153, "lr": 2.3058008105120946e-05, "epoch": 11.446731234866828, "percentage": 57.23, "elapsed_time": "0:25:02", "remaining_time": "0:18:42", "throughput": 2579.52, "total_tokens": 3875256}
1903
+ {"current_steps": 9460, "total_steps": 16520, "loss": 0.1665, "lr": 2.3031676634644406e-05, "epoch": 11.45278450363196, "percentage": 57.26, "elapsed_time": "0:25:03", "remaining_time": "0:18:41", "throughput": 2579.53, "total_tokens": 3877400}
1904
+ {"current_steps": 9465, "total_steps": 16520, "loss": 0.2576, "lr": 2.300534736117292e-05, "epoch": 11.458837772397095, "percentage": 57.29, "elapsed_time": "0:25:03", "remaining_time": "0:18:40", "throughput": 2579.65, "total_tokens": 3879448}
1905
+ {"current_steps": 9470, "total_steps": 16520, "loss": 0.1711, "lr": 2.2979020314094738e-05, "epoch": 11.464891041162227, "percentage": 57.32, "elapsed_time": "0:25:04", "remaining_time": "0:18:40", "throughput": 2579.72, "total_tokens": 3881496}
1906
+ {"current_steps": 9475, "total_steps": 16520, "loss": 0.1834, "lr": 2.2952695522795583e-05, "epoch": 11.470944309927361, "percentage": 57.35, "elapsed_time": "0:25:05", "remaining_time": "0:18:39", "throughput": 2579.8, "total_tokens": 3883480}
1907
+ {"current_steps": 9480, "total_steps": 16520, "loss": 0.2376, "lr": 2.2926373016658703e-05, "epoch": 11.476997578692494, "percentage": 57.38, "elapsed_time": "0:25:06", "remaining_time": "0:18:38", "throughput": 2579.94, "total_tokens": 3885624}
1908
+ {"current_steps": 9485, "total_steps": 16520, "loss": 0.1741, "lr": 2.2900052825064782e-05, "epoch": 11.483050847457626, "percentage": 57.42, "elapsed_time": "0:25:06", "remaining_time": "0:18:37", "throughput": 2579.98, "total_tokens": 3887704}
1909
+ {"current_steps": 9490, "total_steps": 16520, "loss": 0.228, "lr": 2.2873734977391898e-05, "epoch": 11.48910411622276, "percentage": 57.45, "elapsed_time": "0:25:07", "remaining_time": "0:18:36", "throughput": 2580.0, "total_tokens": 3889784}
1910
+ {"current_steps": 9495, "total_steps": 16520, "loss": 0.272, "lr": 2.2847419503015543e-05, "epoch": 11.495157384987893, "percentage": 57.48, "elapsed_time": "0:25:08", "remaining_time": "0:18:36", "throughput": 2580.1, "total_tokens": 3891800}
1911
+ {"current_steps": 9500, "total_steps": 16520, "loss": 0.188, "lr": 2.2821106431308544e-05, "epoch": 11.501210653753027, "percentage": 57.51, "elapsed_time": "0:25:09", "remaining_time": "0:18:35", "throughput": 2580.17, "total_tokens": 3893880}
1912
+ {"current_steps": 9505, "total_steps": 16520, "loss": 0.223, "lr": 2.2794795791641065e-05, "epoch": 11.50726392251816, "percentage": 57.54, "elapsed_time": "0:25:09", "remaining_time": "0:18:34", "throughput": 2580.25, "total_tokens": 3896056}
1913
+ {"current_steps": 9510, "total_steps": 16520, "loss": 0.1798, "lr": 2.276848761338052e-05, "epoch": 11.513317191283292, "percentage": 57.57, "elapsed_time": "0:25:10", "remaining_time": "0:18:33", "throughput": 2580.34, "total_tokens": 3898072}
1914
+ {"current_steps": 9515, "total_steps": 16520, "loss": 0.1699, "lr": 2.2742181925891608e-05, "epoch": 11.519370460048426, "percentage": 57.6, "elapsed_time": "0:25:11", "remaining_time": "0:18:32", "throughput": 2580.4, "total_tokens": 3900216}
1915
+ {"current_steps": 9520, "total_steps": 16520, "loss": 0.1855, "lr": 2.2715878758536236e-05, "epoch": 11.525423728813559, "percentage": 57.63, "elapsed_time": "0:25:12", "remaining_time": "0:18:31", "throughput": 2580.45, "total_tokens": 3902424}
1916
+ {"current_steps": 9525, "total_steps": 16520, "loss": 0.2227, "lr": 2.26895781406735e-05, "epoch": 11.531476997578693, "percentage": 57.66, "elapsed_time": "0:25:13", "remaining_time": "0:18:31", "throughput": 2580.54, "total_tokens": 3904504}
1917
+ {"current_steps": 9530, "total_steps": 16520, "loss": 0.1592, "lr": 2.2663280101659643e-05, "epoch": 11.537530266343826, "percentage": 57.69, "elapsed_time": "0:25:13", "remaining_time": "0:18:30", "throughput": 2580.55, "total_tokens": 3906552}
1918
+ {"current_steps": 9535, "total_steps": 16520, "loss": 0.1253, "lr": 2.2636984670848044e-05, "epoch": 11.543583535108958, "percentage": 57.72, "elapsed_time": "0:25:14", "remaining_time": "0:18:29", "throughput": 2580.62, "total_tokens": 3908600}
1919
+ {"current_steps": 9540, "total_steps": 16520, "loss": 0.1838, "lr": 2.2610691877589145e-05, "epoch": 11.549636803874092, "percentage": 57.75, "elapsed_time": "0:25:15", "remaining_time": "0:18:28", "throughput": 2580.67, "total_tokens": 3910712}
1920
+ {"current_steps": 9545, "total_steps": 16520, "loss": 0.1617, "lr": 2.258440175123048e-05, "epoch": 11.555690072639225, "percentage": 57.78, "elapsed_time": "0:25:16", "remaining_time": "0:18:27", "throughput": 2580.77, "total_tokens": 3912824}
1921
+ {"current_steps": 9550, "total_steps": 16520, "loss": 0.2033, "lr": 2.255811432111658e-05, "epoch": 11.561743341404359, "percentage": 57.81, "elapsed_time": "0:25:16", "remaining_time": "0:18:27", "throughput": 2580.8, "total_tokens": 3914808}
1922
+ {"current_steps": 9555, "total_steps": 16520, "loss": 0.1959, "lr": 2.2531829616588977e-05, "epoch": 11.567796610169491, "percentage": 57.84, "elapsed_time": "0:25:17", "remaining_time": "0:18:26", "throughput": 2580.88, "total_tokens": 3916888}
1923
+ {"current_steps": 9560, "total_steps": 16520, "loss": 0.2298, "lr": 2.2505547666986145e-05, "epoch": 11.573849878934624, "percentage": 57.87, "elapsed_time": "0:25:18", "remaining_time": "0:18:25", "throughput": 2580.95, "total_tokens": 3918872}
1924
+ {"current_steps": 9565, "total_steps": 16520, "loss": 0.3209, "lr": 2.2479268501643512e-05, "epoch": 11.579903147699758, "percentage": 57.9, "elapsed_time": "0:25:19", "remaining_time": "0:18:24", "throughput": 2581.05, "total_tokens": 3920728}
1925
+ {"current_steps": 9570, "total_steps": 16520, "loss": 0.2438, "lr": 2.245299214989338e-05, "epoch": 11.58595641646489, "percentage": 57.93, "elapsed_time": "0:25:19", "remaining_time": "0:18:23", "throughput": 2581.16, "total_tokens": 3922776}
1926
+ {"current_steps": 9575, "total_steps": 16520, "loss": 0.1821, "lr": 2.24267186410649e-05, "epoch": 11.592009685230025, "percentage": 57.96, "elapsed_time": "0:25:20", "remaining_time": "0:18:22", "throughput": 2581.17, "total_tokens": 3924824}
1927
+ {"current_steps": 9580, "total_steps": 16520, "loss": 0.2948, "lr": 2.240044800448407e-05, "epoch": 11.598062953995157, "percentage": 57.99, "elapsed_time": "0:25:21", "remaining_time": "0:18:22", "throughput": 2581.19, "total_tokens": 3926904}
1928
+ {"current_steps": 9585, "total_steps": 16520, "loss": 0.1912, "lr": 2.2374180269473675e-05, "epoch": 11.60411622276029, "percentage": 58.02, "elapsed_time": "0:25:22", "remaining_time": "0:18:21", "throughput": 2581.27, "total_tokens": 3928888}
1929
+ {"current_steps": 9590, "total_steps": 16520, "loss": 0.2554, "lr": 2.2347915465353268e-05, "epoch": 11.610169491525424, "percentage": 58.05, "elapsed_time": "0:25:22", "remaining_time": "0:18:20", "throughput": 2581.32, "total_tokens": 3931000}
1930
+ {"current_steps": 9595, "total_steps": 16520, "loss": 0.3728, "lr": 2.2321653621439103e-05, "epoch": 11.616222760290556, "percentage": 58.08, "elapsed_time": "0:25:23", "remaining_time": "0:18:19", "throughput": 2581.39, "total_tokens": 3932888}
1931
+ {"current_steps": 9600, "total_steps": 16520, "loss": 0.3145, "lr": 2.2295394767044167e-05, "epoch": 11.62227602905569, "percentage": 58.11, "elapsed_time": "0:25:24", "remaining_time": "0:18:18", "throughput": 2581.41, "total_tokens": 3934968}
1932
+ {"current_steps": 9605, "total_steps": 16520, "loss": 0.1992, "lr": 2.2269138931478084e-05, "epoch": 11.628329297820823, "percentage": 58.14, "elapsed_time": "0:25:25", "remaining_time": "0:18:17", "throughput": 2581.47, "total_tokens": 3937016}
1933
+ {"current_steps": 9610, "total_steps": 16520, "loss": 0.2523, "lr": 2.2242886144047133e-05, "epoch": 11.634382566585955, "percentage": 58.17, "elapsed_time": "0:25:25", "remaining_time": "0:18:17", "throughput": 2581.55, "total_tokens": 3939192}
1934
+ {"current_steps": 9615, "total_steps": 16520, "loss": 0.2223, "lr": 2.221663643405415e-05, "epoch": 11.64043583535109, "percentage": 58.2, "elapsed_time": "0:25:26", "remaining_time": "0:18:16", "throughput": 2581.61, "total_tokens": 3941144}
1935
+ {"current_steps": 9620, "total_steps": 16520, "loss": 0.2001, "lr": 2.2190389830798585e-05, "epoch": 11.646489104116222, "percentage": 58.23, "elapsed_time": "0:25:27", "remaining_time": "0:18:15", "throughput": 2581.6, "total_tokens": 3943160}
1936
+ {"current_steps": 9625, "total_steps": 16520, "loss": 0.2614, "lr": 2.2164146363576383e-05, "epoch": 11.652542372881356, "percentage": 58.26, "elapsed_time": "0:25:28", "remaining_time": "0:18:14", "throughput": 2581.65, "total_tokens": 3945304}
1937
+ {"current_steps": 9630, "total_steps": 16520, "loss": 0.195, "lr": 2.2137906061680018e-05, "epoch": 11.658595641646489, "percentage": 58.29, "elapsed_time": "0:25:29", "remaining_time": "0:18:13", "throughput": 2581.7, "total_tokens": 3947512}
1938
+ {"current_steps": 9635, "total_steps": 16520, "loss": 0.1799, "lr": 2.211166895439839e-05, "epoch": 11.664648910411623, "percentage": 58.32, "elapsed_time": "0:25:29", "remaining_time": "0:18:13", "throughput": 2581.72, "total_tokens": 3949688}
1939
+ {"current_steps": 9640, "total_steps": 16520, "loss": 0.3017, "lr": 2.208543507101688e-05, "epoch": 11.670702179176756, "percentage": 58.35, "elapsed_time": "0:25:30", "remaining_time": "0:18:12", "throughput": 2581.77, "total_tokens": 3951640}
1940
+ {"current_steps": 9645, "total_steps": 16520, "loss": 0.3456, "lr": 2.2059204440817245e-05, "epoch": 11.676755447941888, "percentage": 58.38, "elapsed_time": "0:25:31", "remaining_time": "0:18:11", "throughput": 2581.8, "total_tokens": 3953752}
1941
+ {"current_steps": 9650, "total_steps": 16520, "loss": 0.3116, "lr": 2.2032977093077602e-05, "epoch": 11.682808716707022, "percentage": 58.41, "elapsed_time": "0:25:32", "remaining_time": "0:18:10", "throughput": 2581.87, "total_tokens": 3955736}
1942
+ {"current_steps": 9655, "total_steps": 16520, "loss": 0.2457, "lr": 2.2006753057072435e-05, "epoch": 11.688861985472155, "percentage": 58.44, "elapsed_time": "0:25:32", "remaining_time": "0:18:09", "throughput": 2581.97, "total_tokens": 3957784}
1943
+ {"current_steps": 9660, "total_steps": 16520, "loss": 0.1894, "lr": 2.19805323620725e-05, "epoch": 11.694915254237289, "percentage": 58.47, "elapsed_time": "0:25:33", "remaining_time": "0:18:09", "throughput": 2582.01, "total_tokens": 3959896}
1944
+ {"current_steps": 9665, "total_steps": 16520, "loss": 0.267, "lr": 2.195431503734485e-05, "epoch": 11.700968523002421, "percentage": 58.5, "elapsed_time": "0:25:34", "remaining_time": "0:18:08", "throughput": 2582.12, "total_tokens": 3961848}
1945
+ {"current_steps": 9670, "total_steps": 16520, "loss": 0.2022, "lr": 2.1928101112152746e-05, "epoch": 11.707021791767554, "percentage": 58.54, "elapsed_time": "0:25:35", "remaining_time": "0:18:07", "throughput": 2582.22, "total_tokens": 3963896}
1946
+ {"current_steps": 9675, "total_steps": 16520, "loss": 0.2453, "lr": 2.1901890615755694e-05, "epoch": 11.713075060532688, "percentage": 58.57, "elapsed_time": "0:25:35", "remaining_time": "0:18:06", "throughput": 2582.27, "total_tokens": 3965848}
1947
+ {"current_steps": 9680, "total_steps": 16520, "loss": 0.3514, "lr": 2.1875683577409327e-05, "epoch": 11.71912832929782, "percentage": 58.6, "elapsed_time": "0:25:36", "remaining_time": "0:18:05", "throughput": 2582.33, "total_tokens": 3967928}
1948
+ {"current_steps": 9685, "total_steps": 16520, "loss": 0.1857, "lr": 2.1849480026365462e-05, "epoch": 11.725181598062955, "percentage": 58.63, "elapsed_time": "0:25:37", "remaining_time": "0:18:04", "throughput": 2582.37, "total_tokens": 3969944}
1949
+ {"current_steps": 9690, "total_steps": 16520, "loss": 0.3599, "lr": 2.182327999187199e-05, "epoch": 11.731234866828087, "percentage": 58.66, "elapsed_time": "0:25:38", "remaining_time": "0:18:04", "throughput": 2582.49, "total_tokens": 3971928}
1950
+ {"current_steps": 9695, "total_steps": 16520, "loss": 0.2957, "lr": 2.179708350317291e-05, "epoch": 11.73728813559322, "percentage": 58.69, "elapsed_time": "0:25:38", "remaining_time": "0:18:03", "throughput": 2582.59, "total_tokens": 3973784}
1951
+ {"current_steps": 9700, "total_steps": 16520, "loss": 0.2528, "lr": 2.177089058950822e-05, "epoch": 11.743341404358354, "percentage": 58.72, "elapsed_time": "0:25:39", "remaining_time": "0:18:02", "throughput": 2582.7, "total_tokens": 3975736}
1952
+ {"current_steps": 9705, "total_steps": 16520, "loss": 0.1363, "lr": 2.1744701280113963e-05, "epoch": 11.749394673123486, "percentage": 58.75, "elapsed_time": "0:25:40", "remaining_time": "0:18:01", "throughput": 2582.73, "total_tokens": 3977912}
1953
+ {"current_steps": 9710, "total_steps": 16520, "loss": 0.294, "lr": 2.1718515604222144e-05, "epoch": 11.75544794188862, "percentage": 58.78, "elapsed_time": "0:25:40", "remaining_time": "0:18:00", "throughput": 2582.79, "total_tokens": 3979864}
1954
+ {"current_steps": 9715, "total_steps": 16520, "loss": 0.1894, "lr": 2.169233359106073e-05, "epoch": 11.761501210653753, "percentage": 58.81, "elapsed_time": "0:25:41", "remaining_time": "0:17:59", "throughput": 2582.83, "total_tokens": 3981880}
1955
+ {"current_steps": 9720, "total_steps": 16520, "loss": 0.2547, "lr": 2.1666155269853567e-05, "epoch": 11.767554479418886, "percentage": 58.84, "elapsed_time": "0:25:42", "remaining_time": "0:17:59", "throughput": 2582.87, "total_tokens": 3983896}
1956
+ {"current_steps": 9725, "total_steps": 16520, "loss": 0.2768, "lr": 2.1639980669820402e-05, "epoch": 11.77360774818402, "percentage": 58.87, "elapsed_time": "0:25:43", "remaining_time": "0:17:58", "throughput": 2582.91, "total_tokens": 3985816}
1957
+ {"current_steps": 9730, "total_steps": 16520, "loss": 0.1561, "lr": 2.1613809820176837e-05, "epoch": 11.779661016949152, "percentage": 58.9, "elapsed_time": "0:25:43", "remaining_time": "0:17:57", "throughput": 2582.97, "total_tokens": 3987864}
1958
+ {"current_steps": 9735, "total_steps": 16520, "loss": 0.2181, "lr": 2.1587642750134256e-05, "epoch": 11.785714285714286, "percentage": 58.93, "elapsed_time": "0:25:44", "remaining_time": "0:17:56", "throughput": 2582.98, "total_tokens": 3989752}
1959
+ {"current_steps": 9740, "total_steps": 16520, "loss": 0.1971, "lr": 2.1561479488899868e-05, "epoch": 11.791767554479419, "percentage": 58.96, "elapsed_time": "0:25:45", "remaining_time": "0:17:55", "throughput": 2583.05, "total_tokens": 3991832}
1960
+ {"current_steps": 9745, "total_steps": 16520, "loss": 0.2889, "lr": 2.153532006567658e-05, "epoch": 11.797820823244551, "percentage": 58.99, "elapsed_time": "0:25:46", "remaining_time": "0:17:54", "throughput": 2583.04, "total_tokens": 3993688}
1961
+ {"current_steps": 9750, "total_steps": 16520, "loss": 0.1407, "lr": 2.150916450966307e-05, "epoch": 11.803874092009686, "percentage": 59.02, "elapsed_time": "0:25:46", "remaining_time": "0:17:54", "throughput": 2583.08, "total_tokens": 3995800}
1962
+ {"current_steps": 9755, "total_steps": 16520, "loss": 0.2684, "lr": 2.1483012850053653e-05, "epoch": 11.809927360774818, "percentage": 59.05, "elapsed_time": "0:25:47", "remaining_time": "0:17:53", "throughput": 2583.12, "total_tokens": 3997848}
1963
+ {"current_steps": 9760, "total_steps": 16520, "loss": 0.2831, "lr": 2.1456865116038322e-05, "epoch": 11.815980629539952, "percentage": 59.08, "elapsed_time": "0:25:48", "remaining_time": "0:17:52", "throughput": 2583.19, "total_tokens": 3999832}
1964
+ {"current_steps": 9765, "total_steps": 16520, "loss": 0.3538, "lr": 2.1430721336802667e-05, "epoch": 11.822033898305085, "percentage": 59.11, "elapsed_time": "0:25:49", "remaining_time": "0:17:51", "throughput": 2583.24, "total_tokens": 4001848}
1965
+ {"current_steps": 9770, "total_steps": 16520, "loss": 0.2188, "lr": 2.1404581541527902e-05, "epoch": 11.828087167070217, "percentage": 59.14, "elapsed_time": "0:25:49", "remaining_time": "0:17:50", "throughput": 2583.31, "total_tokens": 4003928}
1966
+ {"current_steps": 9775, "total_steps": 16520, "loss": 0.257, "lr": 2.1378445759390738e-05, "epoch": 11.834140435835351, "percentage": 59.17, "elapsed_time": "0:25:50", "remaining_time": "0:17:50", "throughput": 2583.37, "total_tokens": 4005976}
1967
+ {"current_steps": 9780, "total_steps": 16520, "loss": 0.2791, "lr": 2.135231401956346e-05, "epoch": 11.840193704600484, "percentage": 59.2, "elapsed_time": "0:25:51", "remaining_time": "0:17:49", "throughput": 2583.45, "total_tokens": 4007896}
1968
+ {"current_steps": 9785, "total_steps": 16520, "loss": 0.2347, "lr": 2.1326186351213807e-05, "epoch": 11.846246973365618, "percentage": 59.23, "elapsed_time": "0:25:52", "remaining_time": "0:17:48", "throughput": 2583.53, "total_tokens": 4009880}
1969
+ {"current_steps": 9790, "total_steps": 16520, "loss": 0.1754, "lr": 2.1300062783504994e-05, "epoch": 11.85230024213075, "percentage": 59.26, "elapsed_time": "0:25:52", "remaining_time": "0:17:47", "throughput": 2583.55, "total_tokens": 4011960}
1970
+ {"current_steps": 9795, "total_steps": 16520, "loss": 0.1225, "lr": 2.1273943345595637e-05, "epoch": 11.858353510895883, "percentage": 59.29, "elapsed_time": "0:25:53", "remaining_time": "0:17:46", "throughput": 2583.62, "total_tokens": 4014104}
1971
+ {"current_steps": 9800, "total_steps": 16520, "loss": 0.4161, "lr": 2.1247828066639768e-05, "epoch": 11.864406779661017, "percentage": 59.32, "elapsed_time": "0:25:54", "remaining_time": "0:17:45", "throughput": 2583.72, "total_tokens": 4016152}
1972
+ {"current_steps": 9805, "total_steps": 16520, "loss": 0.182, "lr": 2.1221716975786764e-05, "epoch": 11.87046004842615, "percentage": 59.35, "elapsed_time": "0:25:55", "remaining_time": "0:17:45", "throughput": 2583.85, "total_tokens": 4018168}
1973
+ {"current_steps": 9810, "total_steps": 16520, "loss": 0.3146, "lr": 2.119561010218131e-05, "epoch": 11.876513317191284, "percentage": 59.38, "elapsed_time": "0:25:55", "remaining_time": "0:17:44", "throughput": 2583.88, "total_tokens": 4020088}
1974
+ {"current_steps": 9815, "total_steps": 16520, "loss": 0.2005, "lr": 2.1169507474963422e-05, "epoch": 11.882566585956416, "percentage": 59.41, "elapsed_time": "0:25:56", "remaining_time": "0:17:43", "throughput": 2583.92, "total_tokens": 4022296}
1975
+ {"current_steps": 9820, "total_steps": 16520, "loss": 0.1137, "lr": 2.1143409123268342e-05, "epoch": 11.888619854721549, "percentage": 59.44, "elapsed_time": "0:25:57", "remaining_time": "0:17:42", "throughput": 2584.01, "total_tokens": 4024312}
1976
+ {"current_steps": 9825, "total_steps": 16520, "loss": 0.1717, "lr": 2.1117315076226558e-05, "epoch": 11.894673123486683, "percentage": 59.47, "elapsed_time": "0:25:58", "remaining_time": "0:17:41", "throughput": 2584.07, "total_tokens": 4026392}
1977
+ {"current_steps": 9830, "total_steps": 16520, "loss": 0.2401, "lr": 2.109122536296374e-05, "epoch": 11.900726392251816, "percentage": 59.5, "elapsed_time": "0:25:58", "remaining_time": "0:17:40", "throughput": 2584.13, "total_tokens": 4028344}
1978
+ {"current_steps": 9835, "total_steps": 16520, "loss": 0.1689, "lr": 2.1065140012600752e-05, "epoch": 11.90677966101695, "percentage": 59.53, "elapsed_time": "0:25:59", "remaining_time": "0:17:40", "throughput": 2584.16, "total_tokens": 4030328}
1979
+ {"current_steps": 9840, "total_steps": 16520, "loss": 0.1932, "lr": 2.103905905425354e-05, "epoch": 11.912832929782082, "percentage": 59.56, "elapsed_time": "0:26:00", "remaining_time": "0:17:39", "throughput": 2584.24, "total_tokens": 4032248}
1980
+ {"current_steps": 9845, "total_steps": 16520, "loss": 0.36, "lr": 2.1012982517033188e-05, "epoch": 11.918886198547215, "percentage": 59.59, "elapsed_time": "0:26:01", "remaining_time": "0:17:38", "throughput": 2584.31, "total_tokens": 4034392}
1981
+ {"current_steps": 9850, "total_steps": 16520, "loss": 0.2426, "lr": 2.0986910430045818e-05, "epoch": 11.924939467312349, "percentage": 59.62, "elapsed_time": "0:26:01", "remaining_time": "0:17:37", "throughput": 2584.36, "total_tokens": 4036440}
1982
+ {"current_steps": 9855, "total_steps": 16520, "loss": 0.1963, "lr": 2.096084282239262e-05, "epoch": 11.930992736077481, "percentage": 59.65, "elapsed_time": "0:26:02", "remaining_time": "0:17:36", "throughput": 2584.41, "total_tokens": 4038456}
1983
+ {"current_steps": 9860, "total_steps": 16520, "loss": 0.271, "lr": 2.0934779723169735e-05, "epoch": 11.937046004842616, "percentage": 59.69, "elapsed_time": "0:26:03", "remaining_time": "0:17:36", "throughput": 2584.44, "total_tokens": 4040536}
1984
+ {"current_steps": 9865, "total_steps": 16520, "loss": 0.1319, "lr": 2.0908721161468308e-05, "epoch": 11.943099273607748, "percentage": 59.72, "elapsed_time": "0:26:04", "remaining_time": "0:17:35", "throughput": 2584.5, "total_tokens": 4042488}
1985
+ {"current_steps": 9870, "total_steps": 16520, "loss": 0.1912, "lr": 2.088266716637441e-05, "epoch": 11.94915254237288, "percentage": 59.75, "elapsed_time": "0:26:04", "remaining_time": "0:17:34", "throughput": 2584.57, "total_tokens": 4044472}
1986
+ {"current_steps": 9875, "total_steps": 16520, "loss": 0.2292, "lr": 2.0856617766969027e-05, "epoch": 11.955205811138015, "percentage": 59.78, "elapsed_time": "0:26:05", "remaining_time": "0:17:33", "throughput": 2584.62, "total_tokens": 4046424}
1987
+ {"current_steps": 9880, "total_steps": 16520, "loss": 0.2208, "lr": 2.083057299232798e-05, "epoch": 11.961259079903147, "percentage": 59.81, "elapsed_time": "0:26:06", "remaining_time": "0:17:32", "throughput": 2584.69, "total_tokens": 4048504}
1988
+ {"current_steps": 9885, "total_steps": 16520, "loss": 0.3324, "lr": 2.080453287152196e-05, "epoch": 11.967312348668282, "percentage": 59.84, "elapsed_time": "0:26:07", "remaining_time": "0:17:31", "throughput": 2584.74, "total_tokens": 4050552}
1989
+ {"current_steps": 9890, "total_steps": 16520, "loss": 0.2415, "lr": 2.0778497433616463e-05, "epoch": 11.973365617433414, "percentage": 59.87, "elapsed_time": "0:26:07", "remaining_time": "0:17:31", "throughput": 2584.79, "total_tokens": 4052568}
1990
+ {"current_steps": 9895, "total_steps": 16520, "loss": 0.2597, "lr": 2.075246670767173e-05, "epoch": 11.979418886198546, "percentage": 59.9, "elapsed_time": "0:26:08", "remaining_time": "0:17:30", "throughput": 2584.87, "total_tokens": 4054584}
1991
+ {"current_steps": 9900, "total_steps": 16520, "loss": 0.115, "lr": 2.072644072274278e-05, "epoch": 11.98547215496368, "percentage": 59.93, "elapsed_time": "0:26:09", "remaining_time": "0:17:29", "throughput": 2584.89, "total_tokens": 4056568}
1992
+ {"current_steps": 9905, "total_steps": 16520, "loss": 0.3528, "lr": 2.0700419507879303e-05, "epoch": 11.991525423728813, "percentage": 59.96, "elapsed_time": "0:26:10", "remaining_time": "0:17:28", "throughput": 2584.9, "total_tokens": 4058552}
1993
+ {"current_steps": 9910, "total_steps": 16520, "loss": 0.1651, "lr": 2.067440309212571e-05, "epoch": 11.997578692493947, "percentage": 59.99, "elapsed_time": "0:26:10", "remaining_time": "0:17:27", "throughput": 2584.96, "total_tokens": 4060696}
1994
+ {"current_steps": 9912, "total_steps": 16520, "eval_loss": 0.24776192009449005, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:26:15", "remaining_time": "0:17:30", "throughput": 2577.11, "total_tokens": 4061216}
1995
+ {"current_steps": 9915, "total_steps": 16520, "loss": 0.2879, "lr": 2.0648391504521e-05, "epoch": 12.00363196125908, "percentage": 60.02, "elapsed_time": "0:26:17", "remaining_time": "0:17:30", "throughput": 2575.39, "total_tokens": 4062432}
1996
+ {"current_steps": 9920, "total_steps": 16520, "loss": 0.2703, "lr": 2.0622384774098834e-05, "epoch": 12.009685230024212, "percentage": 60.05, "elapsed_time": "0:26:18", "remaining_time": "0:17:30", "throughput": 2575.39, "total_tokens": 4064480}
1997
+ {"current_steps": 9925, "total_steps": 16520, "loss": 0.3752, "lr": 2.0596382929887412e-05, "epoch": 12.015738498789347, "percentage": 60.08, "elapsed_time": "0:26:18", "remaining_time": "0:17:29", "throughput": 2575.4, "total_tokens": 4066560}
1998
+ {"current_steps": 9930, "total_steps": 16520, "loss": 0.2079, "lr": 2.057038600090952e-05, "epoch": 12.021791767554479, "percentage": 60.11, "elapsed_time": "0:26:19", "remaining_time": "0:17:28", "throughput": 2575.41, "total_tokens": 4068384}
1999
+ {"current_steps": 9935, "total_steps": 16520, "loss": 0.2102, "lr": 2.0544394016182405e-05, "epoch": 12.027845036319613, "percentage": 60.14, "elapsed_time": "0:26:20", "remaining_time": "0:17:27", "throughput": 2575.37, "total_tokens": 4070464}