rbelanec commited on
Commit
47225ed
·
verified ·
1 Parent(s): 178f41a

Training in progress, step 36556

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +386 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c22fd255c6b57f6b4d6cf0e0558d759850872d60ac68b2c2b5dc991239e3ce0c
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f286bc20b0a2f3cac4ca38d29132f2df82c7414ef69ef0d9d8c41de048aa91b
3
  size 798032
trainer_log.jsonl CHANGED
@@ -6945,3 +6945,389 @@
6945
  {"current_steps": 34635, "total_steps": 38480, "loss": 0.0397, "lr": 1.5061335734376598e-06, "epoch": 18.00155925155925, "percentage": 90.01, "elapsed_time": "1:23:28", "remaining_time": "0:09:16", "throughput": 1318.26, "total_tokens": 6602488}
6946
  {"current_steps": 34640, "total_steps": 38480, "loss": 0.0782, "lr": 1.5022596895364615e-06, "epoch": 18.004158004158004, "percentage": 90.02, "elapsed_time": "1:23:29", "remaining_time": "0:09:15", "throughput": 1318.22, "total_tokens": 6603416}
6947
  {"current_steps": 34645, "total_steps": 38480, "loss": 0.173, "lr": 1.4983906396803972e-06, "epoch": 18.006756756756758, "percentage": 90.03, "elapsed_time": "1:23:30", "remaining_time": "0:09:14", "throughput": 1318.22, "total_tokens": 6604376}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6945
  {"current_steps": 34635, "total_steps": 38480, "loss": 0.0397, "lr": 1.5061335734376598e-06, "epoch": 18.00155925155925, "percentage": 90.01, "elapsed_time": "1:23:28", "remaining_time": "0:09:16", "throughput": 1318.26, "total_tokens": 6602488}
6946
  {"current_steps": 34640, "total_steps": 38480, "loss": 0.0782, "lr": 1.5022596895364615e-06, "epoch": 18.004158004158004, "percentage": 90.02, "elapsed_time": "1:23:29", "remaining_time": "0:09:15", "throughput": 1318.22, "total_tokens": 6603416}
6947
  {"current_steps": 34645, "total_steps": 38480, "loss": 0.173, "lr": 1.4983906396803972e-06, "epoch": 18.006756756756758, "percentage": 90.03, "elapsed_time": "1:23:30", "remaining_time": "0:09:14", "throughput": 1318.22, "total_tokens": 6604376}
6948
+ {"current_steps": 34650, "total_steps": 38480, "loss": 0.1012, "lr": 1.4945264246654212e-06, "epoch": 18.009355509355508, "percentage": 90.05, "elapsed_time": "1:23:30", "remaining_time": "0:09:13", "throughput": 1318.23, "total_tokens": 6605368}
6949
+ {"current_steps": 34655, "total_steps": 38480, "loss": 0.0162, "lr": 1.490667045286495e-06, "epoch": 18.011954261954262, "percentage": 90.06, "elapsed_time": "1:23:31", "remaining_time": "0:09:13", "throughput": 1318.23, "total_tokens": 6606296}
6950
+ {"current_steps": 34660, "total_steps": 38480, "loss": 0.1872, "lr": 1.486812502337581e-06, "epoch": 18.014553014553016, "percentage": 90.07, "elapsed_time": "1:23:32", "remaining_time": "0:09:12", "throughput": 1318.22, "total_tokens": 6607192}
6951
+ {"current_steps": 34665, "total_steps": 38480, "loss": 0.1185, "lr": 1.482962796611656e-06, "epoch": 18.017151767151766, "percentage": 90.09, "elapsed_time": "1:23:32", "remaining_time": "0:09:11", "throughput": 1318.23, "total_tokens": 6608152}
6952
+ {"current_steps": 34670, "total_steps": 38480, "loss": 0.4231, "lr": 1.4791179289006973e-06, "epoch": 18.01975051975052, "percentage": 90.1, "elapsed_time": "1:23:33", "remaining_time": "0:09:10", "throughput": 1318.23, "total_tokens": 6609112}
6953
+ {"current_steps": 34675, "total_steps": 38480, "loss": 0.1364, "lr": 1.475277899995678e-06, "epoch": 18.022349272349274, "percentage": 90.11, "elapsed_time": "1:23:34", "remaining_time": "0:09:10", "throughput": 1318.22, "total_tokens": 6609976}
6954
+ {"current_steps": 34680, "total_steps": 38480, "loss": 0.083, "lr": 1.4714427106865909e-06, "epoch": 18.024948024948024, "percentage": 90.12, "elapsed_time": "1:23:35", "remaining_time": "0:09:09", "throughput": 1318.21, "total_tokens": 6610840}
6955
+ {"current_steps": 34685, "total_steps": 38480, "loss": 0.1065, "lr": 1.4676123617624215e-06, "epoch": 18.027546777546778, "percentage": 90.14, "elapsed_time": "1:23:35", "remaining_time": "0:09:08", "throughput": 1318.2, "total_tokens": 6611736}
6956
+ {"current_steps": 34690, "total_steps": 38480, "loss": 0.0821, "lr": 1.463786854011165e-06, "epoch": 18.03014553014553, "percentage": 90.15, "elapsed_time": "1:23:36", "remaining_time": "0:09:08", "throughput": 1318.2, "total_tokens": 6612664}
6957
+ {"current_steps": 34695, "total_steps": 38480, "loss": 0.0721, "lr": 1.459966188219819e-06, "epoch": 18.032744282744282, "percentage": 90.16, "elapsed_time": "1:23:37", "remaining_time": "0:09:07", "throughput": 1318.2, "total_tokens": 6613560}
6958
+ {"current_steps": 34700, "total_steps": 38480, "loss": 0.1654, "lr": 1.4561503651743886e-06, "epoch": 18.035343035343036, "percentage": 90.18, "elapsed_time": "1:23:37", "remaining_time": "0:09:06", "throughput": 1318.2, "total_tokens": 6614520}
6959
+ {"current_steps": 34705, "total_steps": 38480, "loss": 0.1337, "lr": 1.452339385659876e-06, "epoch": 18.037941787941786, "percentage": 90.19, "elapsed_time": "1:23:38", "remaining_time": "0:09:05", "throughput": 1318.22, "total_tokens": 6615512}
6960
+ {"current_steps": 34710, "total_steps": 38480, "loss": 0.1031, "lr": 1.4485332504602905e-06, "epoch": 18.04054054054054, "percentage": 90.2, "elapsed_time": "1:23:39", "remaining_time": "0:09:05", "throughput": 1318.22, "total_tokens": 6616440}
6961
+ {"current_steps": 34715, "total_steps": 38480, "loss": 0.1415, "lr": 1.4447319603586473e-06, "epoch": 18.043139293139294, "percentage": 90.22, "elapsed_time": "1:23:39", "remaining_time": "0:09:04", "throughput": 1318.21, "total_tokens": 6617336}
6962
+ {"current_steps": 34720, "total_steps": 38480, "loss": 0.192, "lr": 1.4409355161369626e-06, "epoch": 18.045738045738045, "percentage": 90.23, "elapsed_time": "1:23:40", "remaining_time": "0:09:03", "throughput": 1318.21, "total_tokens": 6618264}
6963
+ {"current_steps": 34725, "total_steps": 38480, "loss": 0.1922, "lr": 1.437143918576253e-06, "epoch": 18.0483367983368, "percentage": 90.24, "elapsed_time": "1:23:41", "remaining_time": "0:09:02", "throughput": 1318.22, "total_tokens": 6619224}
6964
+ {"current_steps": 34730, "total_steps": 38480, "loss": 0.1325, "lr": 1.4333571684565395e-06, "epoch": 18.050935550935552, "percentage": 90.25, "elapsed_time": "1:23:42", "remaining_time": "0:09:02", "throughput": 1318.22, "total_tokens": 6620152}
6965
+ {"current_steps": 34735, "total_steps": 38480, "loss": 0.1229, "lr": 1.4295752665568567e-06, "epoch": 18.053534303534303, "percentage": 90.27, "elapsed_time": "1:23:42", "remaining_time": "0:09:01", "throughput": 1318.23, "total_tokens": 6621144}
6966
+ {"current_steps": 34740, "total_steps": 38480, "loss": 0.1157, "lr": 1.4257982136552189e-06, "epoch": 18.056133056133056, "percentage": 90.28, "elapsed_time": "1:23:43", "remaining_time": "0:09:00", "throughput": 1318.25, "total_tokens": 6622136}
6967
+ {"current_steps": 34745, "total_steps": 38480, "loss": 0.1025, "lr": 1.422026010528657e-06, "epoch": 18.05873180873181, "percentage": 90.29, "elapsed_time": "1:23:44", "remaining_time": "0:09:00", "throughput": 1318.26, "total_tokens": 6623128}
6968
+ {"current_steps": 34750, "total_steps": 38480, "loss": 0.1885, "lr": 1.4182586579532114e-06, "epoch": 18.06133056133056, "percentage": 90.31, "elapsed_time": "1:23:44", "remaining_time": "0:08:59", "throughput": 1318.26, "total_tokens": 6624056}
6969
+ {"current_steps": 34755, "total_steps": 38480, "loss": 0.1268, "lr": 1.4144961567039121e-06, "epoch": 18.063929313929314, "percentage": 90.32, "elapsed_time": "1:23:45", "remaining_time": "0:08:58", "throughput": 1318.27, "total_tokens": 6625048}
6970
+ {"current_steps": 34760, "total_steps": 38480, "loss": 0.106, "lr": 1.4107385075547957e-06, "epoch": 18.066528066528065, "percentage": 90.33, "elapsed_time": "1:23:46", "remaining_time": "0:08:57", "throughput": 1318.29, "total_tokens": 6626072}
6971
+ {"current_steps": 34765, "total_steps": 38480, "loss": 0.076, "lr": 1.4069857112788936e-06, "epoch": 18.06912681912682, "percentage": 90.35, "elapsed_time": "1:23:46", "remaining_time": "0:08:57", "throughput": 1318.31, "total_tokens": 6627064}
6972
+ {"current_steps": 34770, "total_steps": 38480, "loss": 0.1868, "lr": 1.4032377686482578e-06, "epoch": 18.071725571725572, "percentage": 90.36, "elapsed_time": "1:23:47", "remaining_time": "0:08:56", "throughput": 1318.3, "total_tokens": 6627960}
6973
+ {"current_steps": 34775, "total_steps": 38480, "loss": 0.0718, "lr": 1.3994946804339243e-06, "epoch": 18.074324324324323, "percentage": 90.37, "elapsed_time": "1:23:48", "remaining_time": "0:08:55", "throughput": 1318.32, "total_tokens": 6628984}
6974
+ {"current_steps": 34780, "total_steps": 38480, "loss": 0.1856, "lr": 1.3957564474059326e-06, "epoch": 18.076923076923077, "percentage": 90.38, "elapsed_time": "1:23:49", "remaining_time": "0:08:55", "throughput": 1318.34, "total_tokens": 6630008}
6975
+ {"current_steps": 34785, "total_steps": 38480, "loss": 0.1303, "lr": 1.3920230703333231e-06, "epoch": 18.07952182952183, "percentage": 90.4, "elapsed_time": "1:23:49", "remaining_time": "0:08:54", "throughput": 1318.34, "total_tokens": 6630904}
6976
+ {"current_steps": 34790, "total_steps": 38480, "loss": 0.1119, "lr": 1.388294549984151e-06, "epoch": 18.08212058212058, "percentage": 90.41, "elapsed_time": "1:23:50", "remaining_time": "0:08:53", "throughput": 1318.27, "total_tokens": 6631800}
6977
+ {"current_steps": 34795, "total_steps": 38480, "loss": 0.0858, "lr": 1.3845708871254553e-06, "epoch": 18.084719334719335, "percentage": 90.42, "elapsed_time": "1:23:51", "remaining_time": "0:08:52", "throughput": 1318.27, "total_tokens": 6632760}
6978
+ {"current_steps": 34800, "total_steps": 38480, "loss": 0.0958, "lr": 1.3808520825232817e-06, "epoch": 18.08731808731809, "percentage": 90.44, "elapsed_time": "1:23:52", "remaining_time": "0:08:52", "throughput": 1318.29, "total_tokens": 6633752}
6979
+ {"current_steps": 34805, "total_steps": 38480, "loss": 0.0425, "lr": 1.3771381369426822e-06, "epoch": 18.08991683991684, "percentage": 90.45, "elapsed_time": "1:23:52", "remaining_time": "0:08:51", "throughput": 1318.29, "total_tokens": 6634680}
6980
+ {"current_steps": 34810, "total_steps": 38480, "loss": 0.123, "lr": 1.3734290511476955e-06, "epoch": 18.092515592515593, "percentage": 90.46, "elapsed_time": "1:23:53", "remaining_time": "0:08:50", "throughput": 1318.28, "total_tokens": 6635576}
6981
+ {"current_steps": 34815, "total_steps": 38480, "loss": 0.2086, "lr": 1.3697248259013779e-06, "epoch": 18.095114345114347, "percentage": 90.48, "elapsed_time": "1:23:54", "remaining_time": "0:08:49", "throughput": 1318.29, "total_tokens": 6636504}
6982
+ {"current_steps": 34820, "total_steps": 38480, "loss": 0.1814, "lr": 1.3660254619657642e-06, "epoch": 18.097713097713097, "percentage": 90.49, "elapsed_time": "1:23:54", "remaining_time": "0:08:49", "throughput": 1318.29, "total_tokens": 6637432}
6983
+ {"current_steps": 34825, "total_steps": 38480, "loss": 0.0649, "lr": 1.362330960101918e-06, "epoch": 18.10031185031185, "percentage": 90.5, "elapsed_time": "1:23:55", "remaining_time": "0:08:48", "throughput": 1318.3, "total_tokens": 6638424}
6984
+ {"current_steps": 34830, "total_steps": 38480, "loss": 0.1416, "lr": 1.3586413210698756e-06, "epoch": 18.102910602910605, "percentage": 90.51, "elapsed_time": "1:23:56", "remaining_time": "0:08:47", "throughput": 1318.3, "total_tokens": 6639352}
6985
+ {"current_steps": 34835, "total_steps": 38480, "loss": 0.1723, "lr": 1.3549565456286829e-06, "epoch": 18.105509355509355, "percentage": 90.53, "elapsed_time": "1:23:56", "remaining_time": "0:08:47", "throughput": 1318.33, "total_tokens": 6640408}
6986
+ {"current_steps": 34840, "total_steps": 38480, "loss": 0.0907, "lr": 1.3512766345363942e-06, "epoch": 18.10810810810811, "percentage": 90.54, "elapsed_time": "1:23:57", "remaining_time": "0:08:46", "throughput": 1318.33, "total_tokens": 6641336}
6987
+ {"current_steps": 34845, "total_steps": 38480, "loss": 0.0486, "lr": 1.3476015885500487e-06, "epoch": 18.11070686070686, "percentage": 90.55, "elapsed_time": "1:23:58", "remaining_time": "0:08:45", "throughput": 1318.33, "total_tokens": 6642232}
6988
+ {"current_steps": 34850, "total_steps": 38480, "loss": 0.2875, "lr": 1.3439314084256916e-06, "epoch": 18.113305613305613, "percentage": 90.57, "elapsed_time": "1:23:59", "remaining_time": "0:08:44", "throughput": 1318.32, "total_tokens": 6643128}
6989
+ {"current_steps": 34855, "total_steps": 38480, "loss": 0.1901, "lr": 1.340266094918366e-06, "epoch": 18.115904365904367, "percentage": 90.58, "elapsed_time": "1:23:59", "remaining_time": "0:08:44", "throughput": 1318.31, "total_tokens": 6643992}
6990
+ {"current_steps": 34860, "total_steps": 38480, "loss": 0.066, "lr": 1.3366056487821189e-06, "epoch": 18.118503118503117, "percentage": 90.59, "elapsed_time": "1:24:00", "remaining_time": "0:08:43", "throughput": 1318.31, "total_tokens": 6644952}
6991
+ {"current_steps": 34865, "total_steps": 38480, "loss": 0.114, "lr": 1.3329500707699893e-06, "epoch": 18.12110187110187, "percentage": 90.61, "elapsed_time": "1:24:01", "remaining_time": "0:08:42", "throughput": 1318.32, "total_tokens": 6645912}
6992
+ {"current_steps": 34870, "total_steps": 38480, "loss": 0.1557, "lr": 1.3292993616340093e-06, "epoch": 18.123700623700625, "percentage": 90.62, "elapsed_time": "1:24:01", "remaining_time": "0:08:41", "throughput": 1318.33, "total_tokens": 6646872}
6993
+ {"current_steps": 34875, "total_steps": 38480, "loss": 0.0404, "lr": 1.3256535221252254e-06, "epoch": 18.126299376299375, "percentage": 90.63, "elapsed_time": "1:24:02", "remaining_time": "0:08:41", "throughput": 1318.33, "total_tokens": 6647832}
6994
+ {"current_steps": 34880, "total_steps": 38480, "loss": 0.0641, "lr": 1.3220125529936734e-06, "epoch": 18.12889812889813, "percentage": 90.64, "elapsed_time": "1:24:03", "remaining_time": "0:08:40", "throughput": 1318.34, "total_tokens": 6648792}
6995
+ {"current_steps": 34885, "total_steps": 38480, "loss": 0.1289, "lr": 1.318376454988382e-06, "epoch": 18.131496881496883, "percentage": 90.66, "elapsed_time": "1:24:04", "remaining_time": "0:08:39", "throughput": 1318.36, "total_tokens": 6649816}
6996
+ {"current_steps": 34890, "total_steps": 38480, "loss": 0.0982, "lr": 1.3147452288573835e-06, "epoch": 18.134095634095633, "percentage": 90.67, "elapsed_time": "1:24:04", "remaining_time": "0:08:39", "throughput": 1318.36, "total_tokens": 6650744}
6997
+ {"current_steps": 34895, "total_steps": 38480, "loss": 0.0859, "lr": 1.3111188753477166e-06, "epoch": 18.136694386694387, "percentage": 90.68, "elapsed_time": "1:24:05", "remaining_time": "0:08:38", "throughput": 1318.39, "total_tokens": 6651800}
6998
+ {"current_steps": 34900, "total_steps": 38480, "loss": 0.0641, "lr": 1.3074973952053982e-06, "epoch": 18.13929313929314, "percentage": 90.7, "elapsed_time": "1:24:06", "remaining_time": "0:08:37", "throughput": 1318.4, "total_tokens": 6652760}
6999
+ {"current_steps": 34905, "total_steps": 38480, "loss": 0.0473, "lr": 1.3038807891754601e-06, "epoch": 18.14189189189189, "percentage": 90.71, "elapsed_time": "1:24:06", "remaining_time": "0:08:36", "throughput": 1318.4, "total_tokens": 6653688}
7000
+ {"current_steps": 34910, "total_steps": 38480, "loss": 0.0663, "lr": 1.3002690580019184e-06, "epoch": 18.144490644490645, "percentage": 90.72, "elapsed_time": "1:24:07", "remaining_time": "0:08:36", "throughput": 1318.4, "total_tokens": 6654616}
7001
+ {"current_steps": 34915, "total_steps": 38480, "loss": 0.0397, "lr": 1.2966622024277952e-06, "epoch": 18.147089397089395, "percentage": 90.74, "elapsed_time": "1:24:08", "remaining_time": "0:08:35", "throughput": 1318.42, "total_tokens": 6655640}
7002
+ {"current_steps": 34920, "total_steps": 38480, "loss": 0.0992, "lr": 1.293060223195111e-06, "epoch": 18.14968814968815, "percentage": 90.75, "elapsed_time": "1:24:08", "remaining_time": "0:08:34", "throughput": 1318.43, "total_tokens": 6656632}
7003
+ {"current_steps": 34925, "total_steps": 38480, "loss": 0.1294, "lr": 1.2894631210448733e-06, "epoch": 18.152286902286903, "percentage": 90.76, "elapsed_time": "1:24:09", "remaining_time": "0:08:33", "throughput": 1318.43, "total_tokens": 6657528}
7004
+ {"current_steps": 34930, "total_steps": 38480, "loss": 0.1521, "lr": 1.2858708967170868e-06, "epoch": 18.154885654885653, "percentage": 90.77, "elapsed_time": "1:24:10", "remaining_time": "0:08:33", "throughput": 1318.44, "total_tokens": 6658520}
7005
+ {"current_steps": 34935, "total_steps": 38480, "loss": 0.1553, "lr": 1.2822835509507746e-06, "epoch": 18.157484407484407, "percentage": 90.79, "elapsed_time": "1:24:10", "remaining_time": "0:08:32", "throughput": 1318.46, "total_tokens": 6659512}
7006
+ {"current_steps": 34940, "total_steps": 38480, "loss": 0.0949, "lr": 1.2787010844839214e-06, "epoch": 18.16008316008316, "percentage": 90.8, "elapsed_time": "1:24:11", "remaining_time": "0:08:31", "throughput": 1318.47, "total_tokens": 6660536}
7007
+ {"current_steps": 34945, "total_steps": 38480, "loss": 0.1744, "lr": 1.275123498053532e-06, "epoch": 18.16268191268191, "percentage": 90.81, "elapsed_time": "1:24:12", "remaining_time": "0:08:31", "throughput": 1318.49, "total_tokens": 6661528}
7008
+ {"current_steps": 34950, "total_steps": 38480, "loss": 0.0606, "lr": 1.2715507923956038e-06, "epoch": 18.165280665280665, "percentage": 90.83, "elapsed_time": "1:24:13", "remaining_time": "0:08:30", "throughput": 1318.5, "total_tokens": 6662488}
7009
+ {"current_steps": 34955, "total_steps": 38480, "loss": 0.2305, "lr": 1.2679829682451295e-06, "epoch": 18.16787941787942, "percentage": 90.84, "elapsed_time": "1:24:13", "remaining_time": "0:08:29", "throughput": 1318.49, "total_tokens": 6663384}
7010
+ {"current_steps": 34960, "total_steps": 38480, "loss": 0.1189, "lr": 1.2644200263360857e-06, "epoch": 18.17047817047817, "percentage": 90.85, "elapsed_time": "1:24:14", "remaining_time": "0:08:28", "throughput": 1318.5, "total_tokens": 6664344}
7011
+ {"current_steps": 34965, "total_steps": 38480, "loss": 0.0959, "lr": 1.2608619674014638e-06, "epoch": 18.173076923076923, "percentage": 90.87, "elapsed_time": "1:24:15", "remaining_time": "0:08:28", "throughput": 1318.5, "total_tokens": 6665304}
7012
+ {"current_steps": 34970, "total_steps": 38480, "loss": 0.076, "lr": 1.2573087921732368e-06, "epoch": 18.175675675675677, "percentage": 90.88, "elapsed_time": "1:24:15", "remaining_time": "0:08:27", "throughput": 1318.52, "total_tokens": 6666296}
7013
+ {"current_steps": 34975, "total_steps": 38480, "loss": 0.0858, "lr": 1.253760501382381e-06, "epoch": 18.178274428274428, "percentage": 90.89, "elapsed_time": "1:24:16", "remaining_time": "0:08:26", "throughput": 1318.52, "total_tokens": 6667224}
7014
+ {"current_steps": 34980, "total_steps": 38480, "loss": 0.1056, "lr": 1.25021709575886e-06, "epoch": 18.18087318087318, "percentage": 90.9, "elapsed_time": "1:24:17", "remaining_time": "0:08:26", "throughput": 1318.53, "total_tokens": 6668184}
7015
+ {"current_steps": 34985, "total_steps": 38480, "loss": 0.0825, "lr": 1.2466785760316401e-06, "epoch": 18.183471933471935, "percentage": 90.92, "elapsed_time": "1:24:17", "remaining_time": "0:08:25", "throughput": 1318.53, "total_tokens": 6669144}
7016
+ {"current_steps": 34990, "total_steps": 38480, "loss": 0.1062, "lr": 1.2431449429286784e-06, "epoch": 18.186070686070686, "percentage": 90.93, "elapsed_time": "1:24:18", "remaining_time": "0:08:24", "throughput": 1318.55, "total_tokens": 6670168}
7017
+ {"current_steps": 34995, "total_steps": 38480, "loss": 0.1176, "lr": 1.2396161971769292e-06, "epoch": 18.18866943866944, "percentage": 90.94, "elapsed_time": "1:24:19", "remaining_time": "0:08:23", "throughput": 1318.57, "total_tokens": 6671160}
7018
+ {"current_steps": 35000, "total_steps": 38480, "loss": 0.1404, "lr": 1.2360923395023343e-06, "epoch": 18.19126819126819, "percentage": 90.96, "elapsed_time": "1:24:20", "remaining_time": "0:08:23", "throughput": 1318.58, "total_tokens": 6672120}
7019
+ {"current_steps": 35005, "total_steps": 38480, "loss": 0.3072, "lr": 1.2325733706298414e-06, "epoch": 18.193866943866944, "percentage": 90.97, "elapsed_time": "1:24:20", "remaining_time": "0:08:22", "throughput": 1318.58, "total_tokens": 6673080}
7020
+ {"current_steps": 35010, "total_steps": 38480, "loss": 0.2858, "lr": 1.2290592912833798e-06, "epoch": 18.196465696465697, "percentage": 90.98, "elapsed_time": "1:24:21", "remaining_time": "0:08:21", "throughput": 1318.59, "total_tokens": 6674008}
7021
+ {"current_steps": 35015, "total_steps": 38480, "loss": 0.2215, "lr": 1.2255501021858822e-06, "epoch": 18.199064449064448, "percentage": 91.0, "elapsed_time": "1:24:22", "remaining_time": "0:08:20", "throughput": 1318.59, "total_tokens": 6674968}
7022
+ {"current_steps": 35020, "total_steps": 38480, "loss": 0.1678, "lr": 1.2220458040592769e-06, "epoch": 18.2016632016632, "percentage": 91.01, "elapsed_time": "1:24:22", "remaining_time": "0:08:20", "throughput": 1318.59, "total_tokens": 6675896}
7023
+ {"current_steps": 35025, "total_steps": 38480, "loss": 0.1537, "lr": 1.218546397624476e-06, "epoch": 18.204261954261955, "percentage": 91.02, "elapsed_time": "1:24:23", "remaining_time": "0:08:19", "throughput": 1318.6, "total_tokens": 6676856}
7024
+ {"current_steps": 35030, "total_steps": 38480, "loss": 0.2026, "lr": 1.2150518836013924e-06, "epoch": 18.206860706860706, "percentage": 91.03, "elapsed_time": "1:24:24", "remaining_time": "0:08:18", "throughput": 1318.59, "total_tokens": 6677720}
7025
+ {"current_steps": 35035, "total_steps": 38480, "loss": 0.3126, "lr": 1.2115622627089262e-06, "epoch": 18.20945945945946, "percentage": 91.05, "elapsed_time": "1:24:24", "remaining_time": "0:08:18", "throughput": 1318.6, "total_tokens": 6678680}
7026
+ {"current_steps": 35040, "total_steps": 38480, "loss": 0.1744, "lr": 1.2080775356649866e-06, "epoch": 18.212058212058214, "percentage": 91.06, "elapsed_time": "1:24:25", "remaining_time": "0:08:17", "throughput": 1318.6, "total_tokens": 6679640}
7027
+ {"current_steps": 35045, "total_steps": 38480, "loss": 0.139, "lr": 1.2045977031864581e-06, "epoch": 18.214656964656964, "percentage": 91.07, "elapsed_time": "1:24:26", "remaining_time": "0:08:16", "throughput": 1318.6, "total_tokens": 6680568}
7028
+ {"current_steps": 35050, "total_steps": 38480, "loss": 0.0935, "lr": 1.2011227659892239e-06, "epoch": 18.217255717255718, "percentage": 91.09, "elapsed_time": "1:24:27", "remaining_time": "0:08:15", "throughput": 1318.59, "total_tokens": 6681432}
7029
+ {"current_steps": 35055, "total_steps": 38480, "loss": 0.1747, "lr": 1.1976527247881618e-06, "epoch": 18.21985446985447, "percentage": 91.1, "elapsed_time": "1:24:27", "remaining_time": "0:08:15", "throughput": 1318.59, "total_tokens": 6682328}
7030
+ {"current_steps": 35060, "total_steps": 38480, "loss": 0.053, "lr": 1.194187580297146e-06, "epoch": 18.222453222453222, "percentage": 91.11, "elapsed_time": "1:24:28", "remaining_time": "0:08:14", "throughput": 1318.58, "total_tokens": 6683192}
7031
+ {"current_steps": 35065, "total_steps": 38480, "loss": 0.1269, "lr": 1.1907273332290413e-06, "epoch": 18.225051975051976, "percentage": 91.13, "elapsed_time": "1:24:29", "remaining_time": "0:08:13", "throughput": 1318.59, "total_tokens": 6684152}
7032
+ {"current_steps": 35070, "total_steps": 38480, "loss": 0.1277, "lr": 1.1872719842956904e-06, "epoch": 18.227650727650726, "percentage": 91.14, "elapsed_time": "1:24:29", "remaining_time": "0:08:12", "throughput": 1318.58, "total_tokens": 6685048}
7033
+ {"current_steps": 35075, "total_steps": 38480, "loss": 0.2263, "lr": 1.1838215342079516e-06, "epoch": 18.23024948024948, "percentage": 91.15, "elapsed_time": "1:24:30", "remaining_time": "0:08:12", "throughput": 1318.59, "total_tokens": 6686008}
7034
+ {"current_steps": 35080, "total_steps": 38480, "loss": 0.0564, "lr": 1.1803759836756633e-06, "epoch": 18.232848232848234, "percentage": 91.16, "elapsed_time": "1:24:31", "remaining_time": "0:08:11", "throughput": 1318.59, "total_tokens": 6686936}
7035
+ {"current_steps": 35085, "total_steps": 38480, "loss": 0.0649, "lr": 1.1769353334076527e-06, "epoch": 18.235446985446984, "percentage": 91.18, "elapsed_time": "1:24:31", "remaining_time": "0:08:10", "throughput": 1318.6, "total_tokens": 6687928}
7036
+ {"current_steps": 35090, "total_steps": 38480, "loss": 0.2217, "lr": 1.1734995841117508e-06, "epoch": 18.238045738045738, "percentage": 91.19, "elapsed_time": "1:24:32", "remaining_time": "0:08:10", "throughput": 1318.61, "total_tokens": 6688888}
7037
+ {"current_steps": 35095, "total_steps": 38480, "loss": 0.058, "lr": 1.170068736494767e-06, "epoch": 18.240644490644492, "percentage": 91.2, "elapsed_time": "1:24:33", "remaining_time": "0:08:09", "throughput": 1318.63, "total_tokens": 6689880}
7038
+ {"current_steps": 35100, "total_steps": 38480, "loss": 0.069, "lr": 1.1666427912625121e-06, "epoch": 18.243243243243242, "percentage": 91.22, "elapsed_time": "1:24:34", "remaining_time": "0:08:08", "throughput": 1318.63, "total_tokens": 6690808}
7039
+ {"current_steps": 35105, "total_steps": 38480, "loss": 0.1601, "lr": 1.1632217491197806e-06, "epoch": 18.245841995841996, "percentage": 91.23, "elapsed_time": "1:24:34", "remaining_time": "0:08:07", "throughput": 1318.63, "total_tokens": 6691736}
7040
+ {"current_steps": 35110, "total_steps": 38480, "loss": 0.22, "lr": 1.1598056107703675e-06, "epoch": 18.24844074844075, "percentage": 91.24, "elapsed_time": "1:24:35", "remaining_time": "0:08:07", "throughput": 1318.64, "total_tokens": 6692728}
7041
+ {"current_steps": 35115, "total_steps": 38480, "loss": 0.1626, "lr": 1.1563943769170527e-06, "epoch": 18.2510395010395, "percentage": 91.26, "elapsed_time": "1:24:36", "remaining_time": "0:08:06", "throughput": 1318.65, "total_tokens": 6693688}
7042
+ {"current_steps": 35120, "total_steps": 38480, "loss": 0.1977, "lr": 1.1529880482616051e-06, "epoch": 18.253638253638254, "percentage": 91.27, "elapsed_time": "1:24:36", "remaining_time": "0:08:05", "throughput": 1318.64, "total_tokens": 6694584}
7043
+ {"current_steps": 35125, "total_steps": 38480, "loss": 0.1621, "lr": 1.1495866255047866e-06, "epoch": 18.256237006237008, "percentage": 91.28, "elapsed_time": "1:24:37", "remaining_time": "0:08:04", "throughput": 1318.66, "total_tokens": 6695576}
7044
+ {"current_steps": 35130, "total_steps": 38480, "loss": 0.1211, "lr": 1.1461901093463595e-06, "epoch": 18.258835758835758, "percentage": 91.29, "elapsed_time": "1:24:38", "remaining_time": "0:08:04", "throughput": 1318.66, "total_tokens": 6696504}
7045
+ {"current_steps": 35135, "total_steps": 38480, "loss": 0.1278, "lr": 1.1427985004850623e-06, "epoch": 18.261434511434512, "percentage": 91.31, "elapsed_time": "1:24:38", "remaining_time": "0:08:03", "throughput": 1318.67, "total_tokens": 6697496}
7046
+ {"current_steps": 35140, "total_steps": 38480, "loss": 0.0934, "lr": 1.1394117996186226e-06, "epoch": 18.264033264033262, "percentage": 91.32, "elapsed_time": "1:24:39", "remaining_time": "0:08:02", "throughput": 1318.7, "total_tokens": 6698552}
7047
+ {"current_steps": 35145, "total_steps": 38480, "loss": 0.1021, "lr": 1.1360300074437808e-06, "epoch": 18.266632016632016, "percentage": 91.33, "elapsed_time": "1:24:40", "remaining_time": "0:08:02", "throughput": 1318.69, "total_tokens": 6699448}
7048
+ {"current_steps": 35150, "total_steps": 38480, "loss": 0.1761, "lr": 1.132653124656241e-06, "epoch": 18.26923076923077, "percentage": 91.35, "elapsed_time": "1:24:41", "remaining_time": "0:08:01", "throughput": 1318.69, "total_tokens": 6700376}
7049
+ {"current_steps": 35155, "total_steps": 38480, "loss": 0.1576, "lr": 1.1292811519507117e-06, "epoch": 18.27182952182952, "percentage": 91.36, "elapsed_time": "1:24:41", "remaining_time": "0:08:00", "throughput": 1318.69, "total_tokens": 6701272}
7050
+ {"current_steps": 35160, "total_steps": 38480, "loss": 0.0579, "lr": 1.1259140900208853e-06, "epoch": 18.274428274428274, "percentage": 91.37, "elapsed_time": "1:24:42", "remaining_time": "0:07:59", "throughput": 1318.69, "total_tokens": 6702232}
7051
+ {"current_steps": 35165, "total_steps": 38480, "loss": 0.1029, "lr": 1.122551939559452e-06, "epoch": 18.277027027027028, "percentage": 91.39, "elapsed_time": "1:24:43", "remaining_time": "0:07:59", "throughput": 1318.71, "total_tokens": 6703224}
7052
+ {"current_steps": 35170, "total_steps": 38480, "loss": 0.1056, "lr": 1.119194701258086e-06, "epoch": 18.27962577962578, "percentage": 91.4, "elapsed_time": "1:24:43", "remaining_time": "0:07:58", "throughput": 1318.71, "total_tokens": 6704184}
7053
+ {"current_steps": 35175, "total_steps": 38480, "loss": 0.0537, "lr": 1.1158423758074465e-06, "epoch": 18.282224532224532, "percentage": 91.41, "elapsed_time": "1:24:44", "remaining_time": "0:07:57", "throughput": 1318.72, "total_tokens": 6705112}
7054
+ {"current_steps": 35180, "total_steps": 38480, "loss": 0.1486, "lr": 1.112494963897187e-06, "epoch": 18.284823284823286, "percentage": 91.42, "elapsed_time": "1:24:45", "remaining_time": "0:07:57", "throughput": 1318.74, "total_tokens": 6706136}
7055
+ {"current_steps": 35185, "total_steps": 38480, "loss": 0.1552, "lr": 1.109152466215954e-06, "epoch": 18.287422037422036, "percentage": 91.44, "elapsed_time": "1:24:45", "remaining_time": "0:07:56", "throughput": 1318.75, "total_tokens": 6707128}
7056
+ {"current_steps": 35190, "total_steps": 38480, "loss": 0.1324, "lr": 1.105814883451378e-06, "epoch": 18.29002079002079, "percentage": 91.45, "elapsed_time": "1:24:46", "remaining_time": "0:07:55", "throughput": 1318.74, "total_tokens": 6707992}
7057
+ {"current_steps": 35195, "total_steps": 38480, "loss": 0.1184, "lr": 1.1024822162900767e-06, "epoch": 18.292619542619544, "percentage": 91.46, "elapsed_time": "1:24:47", "remaining_time": "0:07:54", "throughput": 1318.74, "total_tokens": 6708952}
7058
+ {"current_steps": 35200, "total_steps": 38480, "loss": 0.0348, "lr": 1.0991544654176623e-06, "epoch": 18.295218295218294, "percentage": 91.48, "elapsed_time": "1:24:48", "remaining_time": "0:07:54", "throughput": 1318.76, "total_tokens": 6709944}
7059
+ {"current_steps": 35205, "total_steps": 38480, "loss": 0.078, "lr": 1.095831631518729e-06, "epoch": 18.29781704781705, "percentage": 91.49, "elapsed_time": "1:24:48", "remaining_time": "0:07:53", "throughput": 1318.76, "total_tokens": 6710872}
7060
+ {"current_steps": 35210, "total_steps": 38480, "loss": 0.1355, "lr": 1.0925137152768633e-06, "epoch": 18.3004158004158, "percentage": 91.5, "elapsed_time": "1:24:49", "remaining_time": "0:07:52", "throughput": 1318.76, "total_tokens": 6711800}
7061
+ {"current_steps": 35215, "total_steps": 38480, "loss": 0.1522, "lr": 1.0892007173746417e-06, "epoch": 18.303014553014552, "percentage": 91.52, "elapsed_time": "1:24:50", "remaining_time": "0:07:51", "throughput": 1318.77, "total_tokens": 6712792}
7062
+ {"current_steps": 35220, "total_steps": 38480, "loss": 0.0954, "lr": 1.0858926384936297e-06, "epoch": 18.305613305613306, "percentage": 91.53, "elapsed_time": "1:24:50", "remaining_time": "0:07:51", "throughput": 1318.78, "total_tokens": 6713752}
7063
+ {"current_steps": 35225, "total_steps": 38480, "loss": 0.1639, "lr": 1.0825894793143721e-06, "epoch": 18.308212058212057, "percentage": 91.54, "elapsed_time": "1:24:51", "remaining_time": "0:07:50", "throughput": 1318.79, "total_tokens": 6714744}
7064
+ {"current_steps": 35230, "total_steps": 38480, "loss": 0.0982, "lr": 1.0792912405164084e-06, "epoch": 18.31081081081081, "percentage": 91.55, "elapsed_time": "1:24:52", "remaining_time": "0:07:49", "throughput": 1318.79, "total_tokens": 6715672}
7065
+ {"current_steps": 35235, "total_steps": 38480, "loss": 0.1747, "lr": 1.0759979227782658e-06, "epoch": 18.313409563409564, "percentage": 91.57, "elapsed_time": "1:24:52", "remaining_time": "0:07:49", "throughput": 1318.79, "total_tokens": 6716600}
7066
+ {"current_steps": 35240, "total_steps": 38480, "loss": 0.1047, "lr": 1.072709526777463e-06, "epoch": 18.316008316008315, "percentage": 91.58, "elapsed_time": "1:24:53", "remaining_time": "0:07:48", "throughput": 1318.8, "total_tokens": 6717560}
7067
+ {"current_steps": 35245, "total_steps": 38480, "loss": 0.1662, "lr": 1.0694260531904948e-06, "epoch": 18.31860706860707, "percentage": 91.59, "elapsed_time": "1:24:54", "remaining_time": "0:07:47", "throughput": 1318.82, "total_tokens": 6718584}
7068
+ {"current_steps": 35250, "total_steps": 38480, "loss": 0.0789, "lr": 1.0661475026928492e-06, "epoch": 18.321205821205822, "percentage": 91.61, "elapsed_time": "1:24:55", "remaining_time": "0:07:46", "throughput": 1318.83, "total_tokens": 6719576}
7069
+ {"current_steps": 35255, "total_steps": 38480, "loss": 0.195, "lr": 1.0628738759590084e-06, "epoch": 18.323804573804573, "percentage": 91.62, "elapsed_time": "1:24:55", "remaining_time": "0:07:46", "throughput": 1318.84, "total_tokens": 6720504}
7070
+ {"current_steps": 35260, "total_steps": 38480, "loss": 0.2091, "lr": 1.0596051736624367e-06, "epoch": 18.326403326403327, "percentage": 91.63, "elapsed_time": "1:24:56", "remaining_time": "0:07:45", "throughput": 1318.86, "total_tokens": 6721528}
7071
+ {"current_steps": 35265, "total_steps": 38480, "loss": 0.1224, "lr": 1.0563413964755742e-06, "epoch": 18.32900207900208, "percentage": 91.65, "elapsed_time": "1:24:57", "remaining_time": "0:07:44", "throughput": 1318.87, "total_tokens": 6722520}
7072
+ {"current_steps": 35270, "total_steps": 38480, "loss": 0.1774, "lr": 1.053082545069864e-06, "epoch": 18.33160083160083, "percentage": 91.66, "elapsed_time": "1:24:57", "remaining_time": "0:07:43", "throughput": 1318.86, "total_tokens": 6723384}
7073
+ {"current_steps": 35275, "total_steps": 38480, "loss": 0.117, "lr": 1.0498286201157281e-06, "epoch": 18.334199584199585, "percentage": 91.67, "elapsed_time": "1:24:58", "remaining_time": "0:07:43", "throughput": 1318.85, "total_tokens": 6724248}
7074
+ {"current_steps": 35280, "total_steps": 38480, "loss": 0.2396, "lr": 1.0465796222825785e-06, "epoch": 18.33679833679834, "percentage": 91.68, "elapsed_time": "1:24:59", "remaining_time": "0:07:42", "throughput": 1318.86, "total_tokens": 6725208}
7075
+ {"current_steps": 35285, "total_steps": 38480, "loss": 0.2517, "lr": 1.0433355522388083e-06, "epoch": 18.33939708939709, "percentage": 91.7, "elapsed_time": "1:24:59", "remaining_time": "0:07:41", "throughput": 1318.88, "total_tokens": 6726232}
7076
+ {"current_steps": 35290, "total_steps": 38480, "loss": 0.0882, "lr": 1.040096410651803e-06, "epoch": 18.341995841995843, "percentage": 91.71, "elapsed_time": "1:25:00", "remaining_time": "0:07:41", "throughput": 1318.9, "total_tokens": 6727288}
7077
+ {"current_steps": 35295, "total_steps": 38480, "loss": 0.1484, "lr": 1.0368621981879296e-06, "epoch": 18.344594594594593, "percentage": 91.72, "elapsed_time": "1:25:01", "remaining_time": "0:07:40", "throughput": 1318.9, "total_tokens": 6728216}
7078
+ {"current_steps": 35300, "total_steps": 38480, "loss": 0.1742, "lr": 1.0336329155125418e-06, "epoch": 18.347193347193347, "percentage": 91.74, "elapsed_time": "1:25:02", "remaining_time": "0:07:39", "throughput": 1318.91, "total_tokens": 6729176}
7079
+ {"current_steps": 35305, "total_steps": 38480, "loss": 0.1338, "lr": 1.0304085632899779e-06, "epoch": 18.3497920997921, "percentage": 91.75, "elapsed_time": "1:25:02", "remaining_time": "0:07:38", "throughput": 1318.92, "total_tokens": 6730136}
7080
+ {"current_steps": 35310, "total_steps": 38480, "loss": 0.291, "lr": 1.027189142183574e-06, "epoch": 18.35239085239085, "percentage": 91.76, "elapsed_time": "1:25:03", "remaining_time": "0:07:38", "throughput": 1318.92, "total_tokens": 6731096}
7081
+ {"current_steps": 35315, "total_steps": 38480, "loss": 0.2252, "lr": 1.0239746528556303e-06, "epoch": 18.354989604989605, "percentage": 91.77, "elapsed_time": "1:25:04", "remaining_time": "0:07:37", "throughput": 1318.93, "total_tokens": 6732056}
7082
+ {"current_steps": 35320, "total_steps": 38480, "loss": 0.1381, "lr": 1.0207650959674492e-06, "epoch": 18.35758835758836, "percentage": 91.79, "elapsed_time": "1:25:04", "remaining_time": "0:07:36", "throughput": 1318.95, "total_tokens": 6733080}
7083
+ {"current_steps": 35325, "total_steps": 38480, "loss": 0.1287, "lr": 1.0175604721793158e-06, "epoch": 18.36018711018711, "percentage": 91.8, "elapsed_time": "1:25:05", "remaining_time": "0:07:35", "throughput": 1318.94, "total_tokens": 6733944}
7084
+ {"current_steps": 35330, "total_steps": 38480, "loss": 0.0873, "lr": 1.014360782150492e-06, "epoch": 18.362785862785863, "percentage": 91.81, "elapsed_time": "1:25:06", "remaining_time": "0:07:35", "throughput": 1318.93, "total_tokens": 6734840}
7085
+ {"current_steps": 35335, "total_steps": 38480, "loss": 0.0695, "lr": 1.0111660265392287e-06, "epoch": 18.365384615384617, "percentage": 91.83, "elapsed_time": "1:25:06", "remaining_time": "0:07:34", "throughput": 1318.94, "total_tokens": 6735768}
7086
+ {"current_steps": 35340, "total_steps": 38480, "loss": 0.0786, "lr": 1.0079762060027698e-06, "epoch": 18.367983367983367, "percentage": 91.84, "elapsed_time": "1:25:07", "remaining_time": "0:07:33", "throughput": 1318.94, "total_tokens": 6736696}
7087
+ {"current_steps": 35345, "total_steps": 38480, "loss": 0.1621, "lr": 1.0047913211973376e-06, "epoch": 18.37058212058212, "percentage": 91.85, "elapsed_time": "1:25:08", "remaining_time": "0:07:33", "throughput": 1318.93, "total_tokens": 6737592}
7088
+ {"current_steps": 35350, "total_steps": 38480, "loss": 0.0567, "lr": 1.0016113727781328e-06, "epoch": 18.373180873180875, "percentage": 91.87, "elapsed_time": "1:25:09", "remaining_time": "0:07:32", "throughput": 1318.96, "total_tokens": 6738648}
7089
+ {"current_steps": 35355, "total_steps": 38480, "loss": 0.1294, "lr": 9.984363613993463e-07, "epoch": 18.375779625779625, "percentage": 91.88, "elapsed_time": "1:25:09", "remaining_time": "0:07:31", "throughput": 1318.98, "total_tokens": 6739672}
7090
+ {"current_steps": 35360, "total_steps": 38480, "loss": 0.0834, "lr": 9.95266287714161e-07, "epoch": 18.37837837837838, "percentage": 91.89, "elapsed_time": "1:25:10", "remaining_time": "0:07:30", "throughput": 1318.99, "total_tokens": 6740664}
7091
+ {"current_steps": 35365, "total_steps": 38480, "loss": 0.1138, "lr": 9.92101152374733e-07, "epoch": 18.38097713097713, "percentage": 91.9, "elapsed_time": "1:25:11", "remaining_time": "0:07:30", "throughput": 1318.99, "total_tokens": 6741560}
7092
+ {"current_steps": 35370, "total_steps": 38480, "loss": 0.1275, "lr": 9.889409560322028e-07, "epoch": 18.383575883575883, "percentage": 91.92, "elapsed_time": "1:25:11", "remaining_time": "0:07:29", "throughput": 1319.0, "total_tokens": 6742552}
7093
+ {"current_steps": 35375, "total_steps": 38480, "loss": 0.0316, "lr": 9.857856993366999e-07, "epoch": 18.386174636174637, "percentage": 91.93, "elapsed_time": "1:25:12", "remaining_time": "0:07:28", "throughput": 1319.0, "total_tokens": 6743480}
7094
+ {"current_steps": 35380, "total_steps": 38480, "loss": 0.1322, "lr": 9.826353829373387e-07, "epoch": 18.388773388773387, "percentage": 91.94, "elapsed_time": "1:25:13", "remaining_time": "0:07:28", "throughput": 1319.0, "total_tokens": 6744408}
7095
+ {"current_steps": 35385, "total_steps": 38480, "loss": 0.2115, "lr": 9.794900074822143e-07, "epoch": 18.39137214137214, "percentage": 91.96, "elapsed_time": "1:25:13", "remaining_time": "0:07:27", "throughput": 1319.01, "total_tokens": 6745368}
7096
+ {"current_steps": 35390, "total_steps": 38480, "loss": 0.2883, "lr": 9.763495736184014e-07, "epoch": 18.393970893970895, "percentage": 91.97, "elapsed_time": "1:25:14", "remaining_time": "0:07:26", "throughput": 1319.01, "total_tokens": 6746264}
7097
+ {"current_steps": 35395, "total_steps": 38480, "loss": 0.222, "lr": 9.73214081991966e-07, "epoch": 18.396569646569645, "percentage": 91.98, "elapsed_time": "1:25:15", "remaining_time": "0:07:25", "throughput": 1319.03, "total_tokens": 6747320}
7098
+ {"current_steps": 35400, "total_steps": 38480, "loss": 0.1149, "lr": 9.70083533247948e-07, "epoch": 18.3991683991684, "percentage": 92.0, "elapsed_time": "1:25:16", "remaining_time": "0:07:25", "throughput": 1319.02, "total_tokens": 6748216}
7099
+ {"current_steps": 35405, "total_steps": 38480, "loss": 0.0902, "lr": 9.66957928030382e-07, "epoch": 18.401767151767153, "percentage": 92.01, "elapsed_time": "1:25:16", "remaining_time": "0:07:24", "throughput": 1319.04, "total_tokens": 6749208}
7100
+ {"current_steps": 35410, "total_steps": 38480, "loss": 0.083, "lr": 9.638372669822733e-07, "epoch": 18.404365904365903, "percentage": 92.02, "elapsed_time": "1:25:17", "remaining_time": "0:07:23", "throughput": 1319.04, "total_tokens": 6750136}
7101
+ {"current_steps": 35415, "total_steps": 38480, "loss": 0.1334, "lr": 9.607215507456247e-07, "epoch": 18.406964656964657, "percentage": 92.03, "elapsed_time": "1:25:18", "remaining_time": "0:07:22", "throughput": 1319.04, "total_tokens": 6751064}
7102
+ {"current_steps": 35420, "total_steps": 38480, "loss": 0.1866, "lr": 9.576107799614042e-07, "epoch": 18.40956340956341, "percentage": 92.05, "elapsed_time": "1:25:18", "remaining_time": "0:07:22", "throughput": 1319.04, "total_tokens": 6751992}
7103
+ {"current_steps": 35425, "total_steps": 38480, "loss": 0.2215, "lr": 9.545049552695805e-07, "epoch": 18.41216216216216, "percentage": 92.06, "elapsed_time": "1:25:19", "remaining_time": "0:07:21", "throughput": 1319.05, "total_tokens": 6752952}
7104
+ {"current_steps": 35430, "total_steps": 38480, "loss": 0.0801, "lr": 9.514040773090843e-07, "epoch": 18.414760914760915, "percentage": 92.07, "elapsed_time": "1:25:20", "remaining_time": "0:07:20", "throughput": 1319.06, "total_tokens": 6753944}
7105
+ {"current_steps": 35435, "total_steps": 38480, "loss": 0.1077, "lr": 9.483081467178523e-07, "epoch": 18.41735966735967, "percentage": 92.09, "elapsed_time": "1:25:20", "remaining_time": "0:07:20", "throughput": 1319.07, "total_tokens": 6754904}
7106
+ {"current_steps": 35440, "total_steps": 38480, "loss": 0.0793, "lr": 9.452171641327834e-07, "epoch": 18.41995841995842, "percentage": 92.1, "elapsed_time": "1:25:21", "remaining_time": "0:07:19", "throughput": 1319.07, "total_tokens": 6755832}
7107
+ {"current_steps": 35445, "total_steps": 38480, "loss": 0.1457, "lr": 9.421311301897662e-07, "epoch": 18.422557172557173, "percentage": 92.11, "elapsed_time": "1:25:22", "remaining_time": "0:07:18", "throughput": 1319.08, "total_tokens": 6756824}
7108
+ {"current_steps": 35450, "total_steps": 38480, "loss": 0.1325, "lr": 9.390500455236762e-07, "epoch": 18.425155925155924, "percentage": 92.13, "elapsed_time": "1:25:23", "remaining_time": "0:07:17", "throughput": 1319.08, "total_tokens": 6757752}
7109
+ {"current_steps": 35455, "total_steps": 38480, "loss": 0.1068, "lr": 9.359739107683673e-07, "epoch": 18.427754677754677, "percentage": 92.14, "elapsed_time": "1:25:23", "remaining_time": "0:07:17", "throughput": 1319.1, "total_tokens": 6758776}
7110
+ {"current_steps": 35460, "total_steps": 38480, "loss": 0.1114, "lr": 9.329027265566643e-07, "epoch": 18.43035343035343, "percentage": 92.15, "elapsed_time": "1:25:24", "remaining_time": "0:07:16", "throughput": 1319.12, "total_tokens": 6759768}
7111
+ {"current_steps": 35465, "total_steps": 38480, "loss": 0.0304, "lr": 9.298364935203918e-07, "epoch": 18.43295218295218, "percentage": 92.16, "elapsed_time": "1:25:25", "remaining_time": "0:07:15", "throughput": 1319.12, "total_tokens": 6760696}
7112
+ {"current_steps": 35470, "total_steps": 38480, "loss": 0.0956, "lr": 9.267752122903428e-07, "epoch": 18.435550935550935, "percentage": 92.18, "elapsed_time": "1:25:25", "remaining_time": "0:07:14", "throughput": 1319.11, "total_tokens": 6761592}
7113
+ {"current_steps": 35475, "total_steps": 38480, "loss": 0.1407, "lr": 9.237188834962995e-07, "epoch": 18.43814968814969, "percentage": 92.19, "elapsed_time": "1:25:26", "remaining_time": "0:07:14", "throughput": 1319.13, "total_tokens": 6762584}
7114
+ {"current_steps": 35480, "total_steps": 38480, "loss": 0.1789, "lr": 9.206675077670146e-07, "epoch": 18.44074844074844, "percentage": 92.2, "elapsed_time": "1:25:27", "remaining_time": "0:07:13", "throughput": 1319.14, "total_tokens": 6763576}
7115
+ {"current_steps": 35485, "total_steps": 38480, "loss": 0.1217, "lr": 9.176210857302331e-07, "epoch": 18.443347193347194, "percentage": 92.22, "elapsed_time": "1:25:27", "remaining_time": "0:07:12", "throughput": 1319.14, "total_tokens": 6764504}
7116
+ {"current_steps": 35490, "total_steps": 38480, "loss": 0.0877, "lr": 9.145796180126814e-07, "epoch": 18.445945945945947, "percentage": 92.23, "elapsed_time": "1:25:28", "remaining_time": "0:07:12", "throughput": 1319.14, "total_tokens": 6765432}
7117
+ {"current_steps": 35495, "total_steps": 38480, "loss": 0.2149, "lr": 9.115431052400536e-07, "epoch": 18.448544698544698, "percentage": 92.24, "elapsed_time": "1:25:29", "remaining_time": "0:07:11", "throughput": 1319.14, "total_tokens": 6766360}
7118
+ {"current_steps": 35500, "total_steps": 38480, "loss": 0.1786, "lr": 9.085115480370332e-07, "epoch": 18.45114345114345, "percentage": 92.26, "elapsed_time": "1:25:30", "remaining_time": "0:07:10", "throughput": 1319.15, "total_tokens": 6767288}
7119
+ {"current_steps": 35505, "total_steps": 38480, "loss": 0.1112, "lr": 9.054849470272909e-07, "epoch": 18.453742203742205, "percentage": 92.27, "elapsed_time": "1:25:30", "remaining_time": "0:07:09", "throughput": 1319.15, "total_tokens": 6768248}
7120
+ {"current_steps": 35510, "total_steps": 38480, "loss": 0.1077, "lr": 9.024633028334673e-07, "epoch": 18.456340956340956, "percentage": 92.28, "elapsed_time": "1:25:31", "remaining_time": "0:07:09", "throughput": 1319.15, "total_tokens": 6769176}
7121
+ {"current_steps": 35515, "total_steps": 38480, "loss": 0.1414, "lr": 8.994466160771847e-07, "epoch": 18.45893970893971, "percentage": 92.29, "elapsed_time": "1:25:32", "remaining_time": "0:07:08", "throughput": 1319.15, "total_tokens": 6770104}
7122
+ {"current_steps": 35520, "total_steps": 38480, "loss": 0.1661, "lr": 8.964348873790496e-07, "epoch": 18.46153846153846, "percentage": 92.31, "elapsed_time": "1:25:32", "remaining_time": "0:07:07", "throughput": 1319.17, "total_tokens": 6771096}
7123
+ {"current_steps": 35525, "total_steps": 38480, "loss": 0.1094, "lr": 8.934281173586467e-07, "epoch": 18.464137214137214, "percentage": 92.32, "elapsed_time": "1:25:33", "remaining_time": "0:07:07", "throughput": 1319.19, "total_tokens": 6772120}
7124
+ {"current_steps": 35530, "total_steps": 38480, "loss": 0.0814, "lr": 8.90426306634537e-07, "epoch": 18.466735966735968, "percentage": 92.33, "elapsed_time": "1:25:34", "remaining_time": "0:07:06", "throughput": 1319.2, "total_tokens": 6773080}
7125
+ {"current_steps": 35535, "total_steps": 38480, "loss": 0.0557, "lr": 8.874294558242624e-07, "epoch": 18.469334719334718, "percentage": 92.35, "elapsed_time": "1:25:34", "remaining_time": "0:07:05", "throughput": 1319.2, "total_tokens": 6774008}
7126
+ {"current_steps": 35540, "total_steps": 38480, "loss": 0.1897, "lr": 8.844375655443549e-07, "epoch": 18.471933471933472, "percentage": 92.36, "elapsed_time": "1:25:35", "remaining_time": "0:07:04", "throughput": 1319.21, "total_tokens": 6775000}
7127
+ {"current_steps": 35545, "total_steps": 38480, "loss": 0.1654, "lr": 8.814506364103137e-07, "epoch": 18.474532224532226, "percentage": 92.37, "elapsed_time": "1:25:36", "remaining_time": "0:07:04", "throughput": 1319.22, "total_tokens": 6775992}
7128
+ {"current_steps": 35550, "total_steps": 38480, "loss": 0.0944, "lr": 8.784686690366223e-07, "epoch": 18.477130977130976, "percentage": 92.39, "elapsed_time": "1:25:37", "remaining_time": "0:07:03", "throughput": 1319.23, "total_tokens": 6776952}
7129
+ {"current_steps": 35555, "total_steps": 38480, "loss": 0.1247, "lr": 8.754916640367344e-07, "epoch": 18.47972972972973, "percentage": 92.4, "elapsed_time": "1:25:37", "remaining_time": "0:07:02", "throughput": 1319.25, "total_tokens": 6777944}
7130
+ {"current_steps": 35560, "total_steps": 38480, "loss": 0.1119, "lr": 8.725196220231019e-07, "epoch": 18.482328482328484, "percentage": 92.41, "elapsed_time": "1:25:38", "remaining_time": "0:07:01", "throughput": 1319.25, "total_tokens": 6778904}
7131
+ {"current_steps": 35565, "total_steps": 38480, "loss": 0.1252, "lr": 8.695525436071412e-07, "epoch": 18.484927234927234, "percentage": 92.42, "elapsed_time": "1:25:39", "remaining_time": "0:07:01", "throughput": 1319.28, "total_tokens": 6779960}
7132
+ {"current_steps": 35570, "total_steps": 38480, "loss": 0.1892, "lr": 8.665904293992472e-07, "epoch": 18.487525987525988, "percentage": 92.44, "elapsed_time": "1:25:39", "remaining_time": "0:07:00", "throughput": 1319.28, "total_tokens": 6780888}
7133
+ {"current_steps": 35575, "total_steps": 38480, "loss": 0.1668, "lr": 8.63633280008802e-07, "epoch": 18.49012474012474, "percentage": 92.45, "elapsed_time": "1:25:40", "remaining_time": "0:06:59", "throughput": 1319.28, "total_tokens": 6781816}
7134
+ {"current_steps": 35580, "total_steps": 38480, "loss": 0.0851, "lr": 8.606810960441608e-07, "epoch": 18.492723492723492, "percentage": 92.46, "elapsed_time": "1:25:41", "remaining_time": "0:06:59", "throughput": 1319.3, "total_tokens": 6782808}
7135
+ {"current_steps": 35585, "total_steps": 38480, "loss": 0.1084, "lr": 8.57733878112657e-07, "epoch": 18.495322245322246, "percentage": 92.48, "elapsed_time": "1:25:41", "remaining_time": "0:06:58", "throughput": 1319.32, "total_tokens": 6783832}
7136
+ {"current_steps": 35590, "total_steps": 38480, "loss": 0.1017, "lr": 8.547916268206058e-07, "epoch": 18.497920997920996, "percentage": 92.49, "elapsed_time": "1:25:42", "remaining_time": "0:06:57", "throughput": 1319.31, "total_tokens": 6784728}
7137
+ {"current_steps": 35595, "total_steps": 38480, "loss": 0.0791, "lr": 8.51854342773295e-07, "epoch": 18.50051975051975, "percentage": 92.5, "elapsed_time": "1:25:43", "remaining_time": "0:06:56", "throughput": 1319.32, "total_tokens": 6785688}
7138
+ {"current_steps": 35600, "total_steps": 38480, "loss": 0.1341, "lr": 8.489220265749942e-07, "epoch": 18.503118503118504, "percentage": 92.52, "elapsed_time": "1:25:44", "remaining_time": "0:06:56", "throughput": 1319.33, "total_tokens": 6786680}
7139
+ {"current_steps": 35605, "total_steps": 38480, "loss": 0.1342, "lr": 8.459946788289513e-07, "epoch": 18.505717255717254, "percentage": 92.53, "elapsed_time": "1:25:44", "remaining_time": "0:06:55", "throughput": 1319.33, "total_tokens": 6787576}
7140
+ {"current_steps": 35610, "total_steps": 38480, "loss": 0.1567, "lr": 8.43072300137393e-07, "epoch": 18.508316008316008, "percentage": 92.54, "elapsed_time": "1:25:45", "remaining_time": "0:06:54", "throughput": 1319.33, "total_tokens": 6788504}
7141
+ {"current_steps": 35615, "total_steps": 38480, "loss": 0.0969, "lr": 8.401548911015245e-07, "epoch": 18.510914760914762, "percentage": 92.55, "elapsed_time": "1:25:46", "remaining_time": "0:06:53", "throughput": 1319.33, "total_tokens": 6789432}
7142
+ {"current_steps": 35620, "total_steps": 38480, "loss": 0.0782, "lr": 8.372424523215211e-07, "epoch": 18.513513513513512, "percentage": 92.57, "elapsed_time": "1:25:46", "remaining_time": "0:06:53", "throughput": 1319.32, "total_tokens": 6790328}
7143
+ {"current_steps": 35625, "total_steps": 38480, "loss": 0.0765, "lr": 8.343349843965398e-07, "epoch": 18.516112266112266, "percentage": 92.58, "elapsed_time": "1:25:47", "remaining_time": "0:06:52", "throughput": 1319.32, "total_tokens": 6791224}
7144
+ {"current_steps": 35630, "total_steps": 38480, "loss": 0.0774, "lr": 8.314324879247243e-07, "epoch": 18.51871101871102, "percentage": 92.59, "elapsed_time": "1:25:48", "remaining_time": "0:06:51", "throughput": 1319.33, "total_tokens": 6792216}
7145
+ {"current_steps": 35635, "total_steps": 38480, "loss": 0.1545, "lr": 8.285349635031803e-07, "epoch": 18.52130977130977, "percentage": 92.61, "elapsed_time": "1:25:48", "remaining_time": "0:06:51", "throughput": 1319.33, "total_tokens": 6793144}
7146
+ {"current_steps": 35640, "total_steps": 38480, "loss": 0.111, "lr": 8.256424117280003e-07, "epoch": 18.523908523908524, "percentage": 92.62, "elapsed_time": "1:25:49", "remaining_time": "0:06:50", "throughput": 1319.34, "total_tokens": 6794072}
7147
+ {"current_steps": 35645, "total_steps": 38480, "loss": 0.1073, "lr": 8.227548331942475e-07, "epoch": 18.526507276507278, "percentage": 92.63, "elapsed_time": "1:25:50", "remaining_time": "0:06:49", "throughput": 1319.34, "total_tokens": 6795000}
7148
+ {"current_steps": 35650, "total_steps": 38480, "loss": 0.1047, "lr": 8.198722284959742e-07, "epoch": 18.52910602910603, "percentage": 92.65, "elapsed_time": "1:25:51", "remaining_time": "0:06:48", "throughput": 1319.34, "total_tokens": 6795928}
7149
+ {"current_steps": 35655, "total_steps": 38480, "loss": 0.072, "lr": 8.169945982261923e-07, "epoch": 18.531704781704782, "percentage": 92.66, "elapsed_time": "1:25:51", "remaining_time": "0:06:48", "throughput": 1319.34, "total_tokens": 6796856}
7150
+ {"current_steps": 35660, "total_steps": 38480, "loss": 0.0942, "lr": 8.141219429768975e-07, "epoch": 18.534303534303533, "percentage": 92.67, "elapsed_time": "1:25:52", "remaining_time": "0:06:47", "throughput": 1319.34, "total_tokens": 6797784}
7151
+ {"current_steps": 35665, "total_steps": 38480, "loss": 0.1594, "lr": 8.11254263339073e-07, "epoch": 18.536902286902286, "percentage": 92.68, "elapsed_time": "1:25:53", "remaining_time": "0:06:46", "throughput": 1319.35, "total_tokens": 6798744}
7152
+ {"current_steps": 35670, "total_steps": 38480, "loss": 0.1037, "lr": 8.083915599026632e-07, "epoch": 18.53950103950104, "percentage": 92.7, "elapsed_time": "1:25:53", "remaining_time": "0:06:46", "throughput": 1319.36, "total_tokens": 6799704}
7153
+ {"current_steps": 35675, "total_steps": 38480, "loss": 0.113, "lr": 8.055338332565943e-07, "epoch": 18.54209979209979, "percentage": 92.71, "elapsed_time": "1:25:54", "remaining_time": "0:06:45", "throughput": 1319.36, "total_tokens": 6800664}
7154
+ {"current_steps": 35680, "total_steps": 38480, "loss": 0.1504, "lr": 8.026810839887655e-07, "epoch": 18.544698544698544, "percentage": 92.72, "elapsed_time": "1:25:55", "remaining_time": "0:06:44", "throughput": 1319.38, "total_tokens": 6801656}
7155
+ {"current_steps": 35685, "total_steps": 38480, "loss": 0.0509, "lr": 7.99833312686063e-07, "epoch": 18.5472972972973, "percentage": 92.74, "elapsed_time": "1:25:55", "remaining_time": "0:06:43", "throughput": 1319.39, "total_tokens": 6802616}
7156
+ {"current_steps": 35690, "total_steps": 38480, "loss": 0.0704, "lr": 7.969905199343374e-07, "epoch": 18.54989604989605, "percentage": 92.75, "elapsed_time": "1:25:56", "remaining_time": "0:06:43", "throughput": 1319.4, "total_tokens": 6803608}
7157
+ {"current_steps": 35695, "total_steps": 38480, "loss": 0.1752, "lr": 7.941527063184157e-07, "epoch": 18.552494802494802, "percentage": 92.76, "elapsed_time": "1:25:57", "remaining_time": "0:06:42", "throughput": 1319.38, "total_tokens": 6804440}
7158
+ {"current_steps": 35700, "total_steps": 38480, "loss": 0.171, "lr": 7.913198724221111e-07, "epoch": 18.555093555093556, "percentage": 92.78, "elapsed_time": "1:25:57", "remaining_time": "0:06:41", "throughput": 1319.39, "total_tokens": 6805400}
7159
+ {"current_steps": 35705, "total_steps": 38480, "loss": 0.1036, "lr": 7.884920188281991e-07, "epoch": 18.557692307692307, "percentage": 92.79, "elapsed_time": "1:25:58", "remaining_time": "0:06:40", "throughput": 1319.39, "total_tokens": 6806328}
7160
+ {"current_steps": 35710, "total_steps": 38480, "loss": 0.2041, "lr": 7.856691461184396e-07, "epoch": 18.56029106029106, "percentage": 92.8, "elapsed_time": "1:25:59", "remaining_time": "0:06:40", "throughput": 1319.4, "total_tokens": 6807288}
7161
+ {"current_steps": 35715, "total_steps": 38480, "loss": 0.0582, "lr": 7.828512548735623e-07, "epoch": 18.562889812889814, "percentage": 92.81, "elapsed_time": "1:26:00", "remaining_time": "0:06:39", "throughput": 1319.41, "total_tokens": 6808248}
7162
+ {"current_steps": 35720, "total_steps": 38480, "loss": 0.0213, "lr": 7.800383456732757e-07, "epoch": 18.565488565488565, "percentage": 92.83, "elapsed_time": "1:26:00", "remaining_time": "0:06:38", "throughput": 1319.41, "total_tokens": 6809208}
7163
+ {"current_steps": 35725, "total_steps": 38480, "loss": 0.1544, "lr": 7.772304190962643e-07, "epoch": 18.56808731808732, "percentage": 92.84, "elapsed_time": "1:26:01", "remaining_time": "0:06:38", "throughput": 1319.42, "total_tokens": 6810168}
7164
+ {"current_steps": 35730, "total_steps": 38480, "loss": 0.091, "lr": 7.744274757201769e-07, "epoch": 18.570686070686072, "percentage": 92.85, "elapsed_time": "1:26:02", "remaining_time": "0:06:37", "throughput": 1319.43, "total_tokens": 6811128}
7165
+ {"current_steps": 35735, "total_steps": 38480, "loss": 0.0725, "lr": 7.71629516121658e-07, "epoch": 18.573284823284823, "percentage": 92.87, "elapsed_time": "1:26:02", "remaining_time": "0:06:36", "throughput": 1319.44, "total_tokens": 6812088}
7166
+ {"current_steps": 35740, "total_steps": 38480, "loss": 0.277, "lr": 7.68836540876311e-07, "epoch": 18.575883575883577, "percentage": 92.88, "elapsed_time": "1:26:03", "remaining_time": "0:06:35", "throughput": 1319.43, "total_tokens": 6812984}
7167
+ {"current_steps": 35745, "total_steps": 38480, "loss": 0.106, "lr": 7.660485505587122e-07, "epoch": 18.578482328482327, "percentage": 92.89, "elapsed_time": "1:26:04", "remaining_time": "0:06:35", "throughput": 1319.46, "total_tokens": 6814072}
7168
+ {"current_steps": 35750, "total_steps": 38480, "loss": 0.0755, "lr": 7.632655457424198e-07, "epoch": 18.58108108108108, "percentage": 92.91, "elapsed_time": "1:26:04", "remaining_time": "0:06:34", "throughput": 1319.47, "total_tokens": 6815032}
7169
+ {"current_steps": 35755, "total_steps": 38480, "loss": 0.14, "lr": 7.604875269999673e-07, "epoch": 18.583679833679835, "percentage": 92.92, "elapsed_time": "1:26:05", "remaining_time": "0:06:33", "throughput": 1319.47, "total_tokens": 6815928}
7170
+ {"current_steps": 35760, "total_steps": 38480, "loss": 0.1959, "lr": 7.577144949028558e-07, "epoch": 18.586278586278585, "percentage": 92.93, "elapsed_time": "1:26:06", "remaining_time": "0:06:32", "throughput": 1319.48, "total_tokens": 6816920}
7171
+ {"current_steps": 35765, "total_steps": 38480, "loss": 0.1326, "lr": 7.54946450021568e-07, "epoch": 18.58887733887734, "percentage": 92.94, "elapsed_time": "1:26:07", "remaining_time": "0:06:32", "throughput": 1319.49, "total_tokens": 6817912}
7172
+ {"current_steps": 35770, "total_steps": 38480, "loss": 0.0497, "lr": 7.521833929255512e-07, "epoch": 18.591476091476093, "percentage": 92.96, "elapsed_time": "1:26:07", "remaining_time": "0:06:31", "throughput": 1319.5, "total_tokens": 6818872}
7173
+ {"current_steps": 35775, "total_steps": 38480, "loss": 0.1641, "lr": 7.494253241832367e-07, "epoch": 18.594074844074843, "percentage": 92.97, "elapsed_time": "1:26:08", "remaining_time": "0:06:30", "throughput": 1319.52, "total_tokens": 6819896}
7174
+ {"current_steps": 35780, "total_steps": 38480, "loss": 0.1163, "lr": 7.466722443620261e-07, "epoch": 18.596673596673597, "percentage": 92.98, "elapsed_time": "1:26:09", "remaining_time": "0:06:30", "throughput": 1319.53, "total_tokens": 6820856}
7175
+ {"current_steps": 35785, "total_steps": 38480, "loss": 0.0774, "lr": 7.439241540282887e-07, "epoch": 18.59927234927235, "percentage": 93.0, "elapsed_time": "1:26:09", "remaining_time": "0:06:29", "throughput": 1319.53, "total_tokens": 6821816}
7176
+ {"current_steps": 35790, "total_steps": 38480, "loss": 0.1894, "lr": 7.411810537473752e-07, "epoch": 18.6018711018711, "percentage": 93.01, "elapsed_time": "1:26:10", "remaining_time": "0:06:28", "throughput": 1319.53, "total_tokens": 6822712}
7177
+ {"current_steps": 35795, "total_steps": 38480, "loss": 0.0795, "lr": 7.38442944083606e-07, "epoch": 18.604469854469855, "percentage": 93.02, "elapsed_time": "1:26:11", "remaining_time": "0:06:27", "throughput": 1319.53, "total_tokens": 6823640}
7178
+ {"current_steps": 35800, "total_steps": 38480, "loss": 0.0789, "lr": 7.357098256002726e-07, "epoch": 18.60706860706861, "percentage": 93.04, "elapsed_time": "1:26:11", "remaining_time": "0:06:27", "throughput": 1319.54, "total_tokens": 6824632}
7179
+ {"current_steps": 35805, "total_steps": 38480, "loss": 0.0881, "lr": 7.329816988596472e-07, "epoch": 18.60966735966736, "percentage": 93.05, "elapsed_time": "1:26:12", "remaining_time": "0:06:26", "throughput": 1319.56, "total_tokens": 6825624}
7180
+ {"current_steps": 35810, "total_steps": 38480, "loss": 0.2558, "lr": 7.30258564422967e-07, "epoch": 18.612266112266113, "percentage": 93.06, "elapsed_time": "1:26:13", "remaining_time": "0:06:25", "throughput": 1319.57, "total_tokens": 6826616}
7181
+ {"current_steps": 35815, "total_steps": 38480, "loss": 0.0548, "lr": 7.275404228504506e-07, "epoch": 18.614864864864863, "percentage": 93.07, "elapsed_time": "1:26:14", "remaining_time": "0:06:25", "throughput": 1319.58, "total_tokens": 6827608}
7182
+ {"current_steps": 35820, "total_steps": 38480, "loss": 0.172, "lr": 7.248272747012786e-07, "epoch": 18.617463617463617, "percentage": 93.09, "elapsed_time": "1:26:14", "remaining_time": "0:06:24", "throughput": 1319.6, "total_tokens": 6828632}
7183
+ {"current_steps": 35825, "total_steps": 38480, "loss": 0.2392, "lr": 7.221191205336125e-07, "epoch": 18.62006237006237, "percentage": 93.1, "elapsed_time": "1:26:15", "remaining_time": "0:06:23", "throughput": 1319.6, "total_tokens": 6829528}
7184
+ {"current_steps": 35830, "total_steps": 38480, "loss": 0.077, "lr": 7.194159609045875e-07, "epoch": 18.62266112266112, "percentage": 93.11, "elapsed_time": "1:26:16", "remaining_time": "0:06:22", "throughput": 1319.6, "total_tokens": 6830456}
7185
+ {"current_steps": 35835, "total_steps": 38480, "loss": 0.1063, "lr": 7.167177963703031e-07, "epoch": 18.625259875259875, "percentage": 93.13, "elapsed_time": "1:26:16", "remaining_time": "0:06:22", "throughput": 1319.61, "total_tokens": 6831416}
7186
+ {"current_steps": 35840, "total_steps": 38480, "loss": 0.1018, "lr": 7.140246274858347e-07, "epoch": 18.62785862785863, "percentage": 93.14, "elapsed_time": "1:26:17", "remaining_time": "0:06:21", "throughput": 1319.59, "total_tokens": 6832280}
7187
+ {"current_steps": 35845, "total_steps": 38480, "loss": 0.0969, "lr": 7.113364548052392e-07, "epoch": 18.63045738045738, "percentage": 93.15, "elapsed_time": "1:26:18", "remaining_time": "0:06:20", "throughput": 1319.61, "total_tokens": 6833272}
7188
+ {"current_steps": 35850, "total_steps": 38480, "loss": 0.1185, "lr": 7.086532788815354e-07, "epoch": 18.633056133056133, "percentage": 93.17, "elapsed_time": "1:26:18", "remaining_time": "0:06:19", "throughput": 1319.62, "total_tokens": 6834264}
7189
+ {"current_steps": 35855, "total_steps": 38480, "loss": 0.1952, "lr": 7.059751002667092e-07, "epoch": 18.635654885654887, "percentage": 93.18, "elapsed_time": "1:26:19", "remaining_time": "0:06:19", "throughput": 1319.63, "total_tokens": 6835256}
7190
+ {"current_steps": 35860, "total_steps": 38480, "loss": 0.0847, "lr": 7.033019195117313e-07, "epoch": 18.638253638253637, "percentage": 93.19, "elapsed_time": "1:26:20", "remaining_time": "0:06:18", "throughput": 1319.64, "total_tokens": 6836216}
7191
+ {"current_steps": 35865, "total_steps": 38480, "loss": 0.1113, "lr": 7.006337371665395e-07, "epoch": 18.64085239085239, "percentage": 93.2, "elapsed_time": "1:26:21", "remaining_time": "0:06:17", "throughput": 1319.65, "total_tokens": 6837208}
7192
+ {"current_steps": 35870, "total_steps": 38480, "loss": 0.1506, "lr": 6.979705537800418e-07, "epoch": 18.643451143451145, "percentage": 93.22, "elapsed_time": "1:26:21", "remaining_time": "0:06:17", "throughput": 1319.65, "total_tokens": 6838104}
7193
+ {"current_steps": 35875, "total_steps": 38480, "loss": 0.0612, "lr": 6.953123699001141e-07, "epoch": 18.646049896049895, "percentage": 93.23, "elapsed_time": "1:26:22", "remaining_time": "0:06:16", "throughput": 1319.65, "total_tokens": 6839032}
7194
+ {"current_steps": 35880, "total_steps": 38480, "loss": 0.1143, "lr": 6.926591860736131e-07, "epoch": 18.64864864864865, "percentage": 93.24, "elapsed_time": "1:26:23", "remaining_time": "0:06:15", "throughput": 1319.65, "total_tokens": 6839960}
7195
+ {"current_steps": 35885, "total_steps": 38480, "loss": 0.2283, "lr": 6.900110028463635e-07, "epoch": 18.651247401247403, "percentage": 93.26, "elapsed_time": "1:26:23", "remaining_time": "0:06:14", "throughput": 1319.65, "total_tokens": 6840920}
7196
+ {"current_steps": 35890, "total_steps": 38480, "loss": 0.3054, "lr": 6.873678207631573e-07, "epoch": 18.653846153846153, "percentage": 93.27, "elapsed_time": "1:26:24", "remaining_time": "0:06:14", "throughput": 1319.66, "total_tokens": 6841848}
7197
+ {"current_steps": 35895, "total_steps": 38480, "loss": 0.1143, "lr": 6.847296403677539e-07, "epoch": 18.656444906444907, "percentage": 93.28, "elapsed_time": "1:26:25", "remaining_time": "0:06:13", "throughput": 1319.64, "total_tokens": 6842680}
7198
+ {"current_steps": 35900, "total_steps": 38480, "loss": 0.1521, "lr": 6.820964622028997e-07, "epoch": 18.659043659043657, "percentage": 93.3, "elapsed_time": "1:26:25", "remaining_time": "0:06:12", "throughput": 1319.65, "total_tokens": 6843672}
7199
+ {"current_steps": 35905, "total_steps": 38480, "loss": 0.1616, "lr": 6.794682868103003e-07, "epoch": 18.66164241164241, "percentage": 93.31, "elapsed_time": "1:26:26", "remaining_time": "0:06:11", "throughput": 1319.65, "total_tokens": 6844600}
7200
+ {"current_steps": 35910, "total_steps": 38480, "loss": 0.0529, "lr": 6.768451147306287e-07, "epoch": 18.664241164241165, "percentage": 93.32, "elapsed_time": "1:26:27", "remaining_time": "0:06:11", "throughput": 1319.65, "total_tokens": 6845528}
7201
+ {"current_steps": 35915, "total_steps": 38480, "loss": 0.0679, "lr": 6.742269465035394e-07, "epoch": 18.666839916839916, "percentage": 93.33, "elapsed_time": "1:26:28", "remaining_time": "0:06:10", "throughput": 1319.63, "total_tokens": 6846360}
7202
+ {"current_steps": 35920, "total_steps": 38480, "loss": 0.1611, "lr": 6.716137826676461e-07, "epoch": 18.66943866943867, "percentage": 93.35, "elapsed_time": "1:26:28", "remaining_time": "0:06:09", "throughput": 1319.63, "total_tokens": 6847288}
7203
+ {"current_steps": 35925, "total_steps": 38480, "loss": 0.127, "lr": 6.690056237605464e-07, "epoch": 18.672037422037423, "percentage": 93.36, "elapsed_time": "1:26:29", "remaining_time": "0:06:09", "throughput": 1319.64, "total_tokens": 6848216}
7204
+ {"current_steps": 35930, "total_steps": 38480, "loss": 0.1934, "lr": 6.664024703187916e-07, "epoch": 18.674636174636174, "percentage": 93.37, "elapsed_time": "1:26:30", "remaining_time": "0:06:08", "throughput": 1319.64, "total_tokens": 6849176}
7205
+ {"current_steps": 35935, "total_steps": 38480, "loss": 0.0716, "lr": 6.638043228779229e-07, "epoch": 18.677234927234927, "percentage": 93.39, "elapsed_time": "1:26:30", "remaining_time": "0:06:07", "throughput": 1319.64, "total_tokens": 6850104}
7206
+ {"current_steps": 35940, "total_steps": 38480, "loss": 0.1106, "lr": 6.612111819724321e-07, "epoch": 18.67983367983368, "percentage": 93.4, "elapsed_time": "1:26:31", "remaining_time": "0:06:06", "throughput": 1319.66, "total_tokens": 6851096}
7207
+ {"current_steps": 35945, "total_steps": 38480, "loss": 0.0631, "lr": 6.586230481357953e-07, "epoch": 18.68243243243243, "percentage": 93.41, "elapsed_time": "1:26:32", "remaining_time": "0:06:06", "throughput": 1319.66, "total_tokens": 6852024}
7208
+ {"current_steps": 35950, "total_steps": 38480, "loss": 0.066, "lr": 6.560399219004504e-07, "epoch": 18.685031185031185, "percentage": 93.43, "elapsed_time": "1:26:32", "remaining_time": "0:06:05", "throughput": 1319.66, "total_tokens": 6852952}
7209
+ {"current_steps": 35955, "total_steps": 38480, "loss": 0.1836, "lr": 6.534618037978113e-07, "epoch": 18.68762993762994, "percentage": 93.44, "elapsed_time": "1:26:33", "remaining_time": "0:06:04", "throughput": 1319.66, "total_tokens": 6853880}
7210
+ {"current_steps": 35960, "total_steps": 38480, "loss": 0.1074, "lr": 6.508886943582565e-07, "epoch": 18.69022869022869, "percentage": 93.45, "elapsed_time": "1:26:34", "remaining_time": "0:06:04", "throughput": 1319.66, "total_tokens": 6854776}
7211
+ {"current_steps": 35965, "total_steps": 38480, "loss": 0.1215, "lr": 6.483205941111348e-07, "epoch": 18.692827442827443, "percentage": 93.46, "elapsed_time": "1:26:35", "remaining_time": "0:06:03", "throughput": 1319.67, "total_tokens": 6855736}
7212
+ {"current_steps": 35970, "total_steps": 38480, "loss": 0.1048, "lr": 6.457575035847652e-07, "epoch": 18.695426195426194, "percentage": 93.48, "elapsed_time": "1:26:35", "remaining_time": "0:06:02", "throughput": 1319.67, "total_tokens": 6856664}
7213
+ {"current_steps": 35975, "total_steps": 38480, "loss": 0.2111, "lr": 6.431994233064426e-07, "epoch": 18.698024948024948, "percentage": 93.49, "elapsed_time": "1:26:36", "remaining_time": "0:06:01", "throughput": 1319.69, "total_tokens": 6857688}
7214
+ {"current_steps": 35980, "total_steps": 38480, "loss": 0.0357, "lr": 6.406463538024182e-07, "epoch": 18.7006237006237, "percentage": 93.5, "elapsed_time": "1:26:37", "remaining_time": "0:06:01", "throughput": 1319.69, "total_tokens": 6858616}
7215
+ {"current_steps": 35985, "total_steps": 38480, "loss": 0.2537, "lr": 6.380982955979192e-07, "epoch": 18.703222453222452, "percentage": 93.52, "elapsed_time": "1:26:37", "remaining_time": "0:06:00", "throughput": 1319.68, "total_tokens": 6859512}
7216
+ {"current_steps": 35990, "total_steps": 38480, "loss": 0.0737, "lr": 6.355552492171485e-07, "epoch": 18.705821205821206, "percentage": 93.53, "elapsed_time": "1:26:38", "remaining_time": "0:05:59", "throughput": 1319.67, "total_tokens": 6860376}
7217
+ {"current_steps": 35995, "total_steps": 38480, "loss": 0.1429, "lr": 6.330172151832653e-07, "epoch": 18.70841995841996, "percentage": 93.54, "elapsed_time": "1:26:39", "remaining_time": "0:05:58", "throughput": 1319.68, "total_tokens": 6861368}
7218
+ {"current_steps": 36000, "total_steps": 38480, "loss": 0.145, "lr": 6.30484194018402e-07, "epoch": 18.71101871101871, "percentage": 93.56, "elapsed_time": "1:26:39", "remaining_time": "0:05:58", "throughput": 1319.68, "total_tokens": 6862264}
7219
+ {"current_steps": 36005, "total_steps": 38480, "loss": 0.1115, "lr": 6.279561862436667e-07, "epoch": 18.713617463617464, "percentage": 93.57, "elapsed_time": "1:26:40", "remaining_time": "0:05:57", "throughput": 1319.68, "total_tokens": 6863192}
7220
+ {"current_steps": 36010, "total_steps": 38480, "loss": 0.0731, "lr": 6.254331923791324e-07, "epoch": 18.716216216216218, "percentage": 93.58, "elapsed_time": "1:26:41", "remaining_time": "0:05:56", "throughput": 1319.7, "total_tokens": 6864184}
7221
+ {"current_steps": 36015, "total_steps": 38480, "loss": 0.1116, "lr": 6.229152129438309e-07, "epoch": 18.718814968814968, "percentage": 93.59, "elapsed_time": "1:26:42", "remaining_time": "0:05:56", "throughput": 1319.7, "total_tokens": 6865144}
7222
+ {"current_steps": 36020, "total_steps": 38480, "loss": 0.13, "lr": 6.204022484557759e-07, "epoch": 18.72141372141372, "percentage": 93.61, "elapsed_time": "1:26:42", "remaining_time": "0:05:55", "throughput": 1319.7, "total_tokens": 6866072}
7223
+ {"current_steps": 36025, "total_steps": 38480, "loss": 0.1272, "lr": 6.178942994319453e-07, "epoch": 18.724012474012476, "percentage": 93.62, "elapsed_time": "1:26:43", "remaining_time": "0:05:54", "throughput": 1319.71, "total_tokens": 6867000}
7224
+ {"current_steps": 36030, "total_steps": 38480, "loss": 0.1524, "lr": 6.153913663882821e-07, "epoch": 18.726611226611226, "percentage": 93.63, "elapsed_time": "1:26:44", "remaining_time": "0:05:53", "throughput": 1319.7, "total_tokens": 6867896}
7225
+ {"current_steps": 36035, "total_steps": 38480, "loss": 0.105, "lr": 6.128934498396993e-07, "epoch": 18.72920997920998, "percentage": 93.65, "elapsed_time": "1:26:44", "remaining_time": "0:05:53", "throughput": 1319.71, "total_tokens": 6868856}
7226
+ {"current_steps": 36040, "total_steps": 38480, "loss": 0.2882, "lr": 6.104005503000776e-07, "epoch": 18.731808731808734, "percentage": 93.66, "elapsed_time": "1:26:45", "remaining_time": "0:05:52", "throughput": 1319.71, "total_tokens": 6869816}
7227
+ {"current_steps": 36045, "total_steps": 38480, "loss": 0.2219, "lr": 6.079126682822677e-07, "epoch": 18.734407484407484, "percentage": 93.67, "elapsed_time": "1:26:46", "remaining_time": "0:05:51", "throughput": 1319.7, "total_tokens": 6870680}
7228
+ {"current_steps": 36050, "total_steps": 38480, "loss": 0.1363, "lr": 6.054298042980827e-07, "epoch": 18.737006237006238, "percentage": 93.69, "elapsed_time": "1:26:46", "remaining_time": "0:05:50", "throughput": 1319.71, "total_tokens": 6871640}
7229
+ {"current_steps": 36055, "total_steps": 38480, "loss": 0.1128, "lr": 6.029519588583082e-07, "epoch": 18.739604989604988, "percentage": 93.7, "elapsed_time": "1:26:47", "remaining_time": "0:05:50", "throughput": 1319.71, "total_tokens": 6872568}
7230
+ {"current_steps": 36060, "total_steps": 38480, "loss": 0.183, "lr": 6.004791324726978e-07, "epoch": 18.742203742203742, "percentage": 93.71, "elapsed_time": "1:26:48", "remaining_time": "0:05:49", "throughput": 1319.72, "total_tokens": 6873528}
7231
+ {"current_steps": 36065, "total_steps": 38480, "loss": 0.1227, "lr": 5.980113256499719e-07, "epoch": 18.744802494802496, "percentage": 93.72, "elapsed_time": "1:26:49", "remaining_time": "0:05:48", "throughput": 1319.73, "total_tokens": 6874488}
7232
+ {"current_steps": 36070, "total_steps": 38480, "loss": 0.1636, "lr": 5.955485388978083e-07, "epoch": 18.747401247401246, "percentage": 93.74, "elapsed_time": "1:26:49", "remaining_time": "0:05:48", "throughput": 1319.74, "total_tokens": 6875512}
7233
+ {"current_steps": 36075, "total_steps": 38480, "loss": 0.1597, "lr": 5.930907727228734e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "1:26:50", "remaining_time": "0:05:47", "throughput": 1319.75, "total_tokens": 6876440}
7234
+ {"current_steps": 36080, "total_steps": 38480, "loss": 0.1287, "lr": 5.906380276307827e-07, "epoch": 18.752598752598754, "percentage": 93.76, "elapsed_time": "1:26:51", "remaining_time": "0:05:46", "throughput": 1319.76, "total_tokens": 6877464}
7235
+ {"current_steps": 36085, "total_steps": 38480, "loss": 0.0434, "lr": 5.881903041261211e-07, "epoch": 18.755197505197504, "percentage": 93.78, "elapsed_time": "1:26:51", "remaining_time": "0:05:45", "throughput": 1319.77, "total_tokens": 6878424}
7236
+ {"current_steps": 36090, "total_steps": 38480, "loss": 0.0978, "lr": 5.85747602712447e-07, "epoch": 18.757796257796258, "percentage": 93.79, "elapsed_time": "1:26:52", "remaining_time": "0:05:45", "throughput": 1319.77, "total_tokens": 6879352}
7237
+ {"current_steps": 36095, "total_steps": 38480, "loss": 0.2583, "lr": 5.833099238922834e-07, "epoch": 18.760395010395012, "percentage": 93.8, "elapsed_time": "1:26:53", "remaining_time": "0:05:44", "throughput": 1319.77, "total_tokens": 6880280}
7238
+ {"current_steps": 36100, "total_steps": 38480, "loss": 0.0806, "lr": 5.808772681671182e-07, "epoch": 18.762993762993762, "percentage": 93.81, "elapsed_time": "1:26:53", "remaining_time": "0:05:43", "throughput": 1319.78, "total_tokens": 6881240}
7239
+ {"current_steps": 36105, "total_steps": 38480, "loss": 0.0877, "lr": 5.784496360374037e-07, "epoch": 18.765592515592516, "percentage": 93.83, "elapsed_time": "1:26:54", "remaining_time": "0:05:43", "throughput": 1319.78, "total_tokens": 6882168}
7240
+ {"current_steps": 36110, "total_steps": 38480, "loss": 0.0932, "lr": 5.760270280025653e-07, "epoch": 18.768191268191266, "percentage": 93.84, "elapsed_time": "1:26:55", "remaining_time": "0:05:42", "throughput": 1319.79, "total_tokens": 6883128}
7241
+ {"current_steps": 36115, "total_steps": 38480, "loss": 0.1874, "lr": 5.736094445609907e-07, "epoch": 18.77079002079002, "percentage": 93.85, "elapsed_time": "1:26:56", "remaining_time": "0:05:41", "throughput": 1319.79, "total_tokens": 6884056}
7242
+ {"current_steps": 36120, "total_steps": 38480, "loss": 0.1374, "lr": 5.711968862100319e-07, "epoch": 18.773388773388774, "percentage": 93.87, "elapsed_time": "1:26:56", "remaining_time": "0:05:40", "throughput": 1319.8, "total_tokens": 6885016}
7243
+ {"current_steps": 36125, "total_steps": 38480, "loss": 0.1625, "lr": 5.687893534460087e-07, "epoch": 18.775987525987524, "percentage": 93.88, "elapsed_time": "1:26:57", "remaining_time": "0:05:40", "throughput": 1319.8, "total_tokens": 6885944}
7244
+ {"current_steps": 36130, "total_steps": 38480, "loss": 0.1153, "lr": 5.663868467642136e-07, "epoch": 18.77858627858628, "percentage": 93.89, "elapsed_time": "1:26:58", "remaining_time": "0:05:39", "throughput": 1319.81, "total_tokens": 6886904}
7245
+ {"current_steps": 36135, "total_steps": 38480, "loss": 0.1598, "lr": 5.639893666588986e-07, "epoch": 18.781185031185032, "percentage": 93.91, "elapsed_time": "1:26:58", "remaining_time": "0:05:38", "throughput": 1319.82, "total_tokens": 6887864}
7246
+ {"current_steps": 36140, "total_steps": 38480, "loss": 0.0805, "lr": 5.615969136232774e-07, "epoch": 18.783783783783782, "percentage": 93.92, "elapsed_time": "1:26:59", "remaining_time": "0:05:37", "throughput": 1319.84, "total_tokens": 6888856}
7247
+ {"current_steps": 36145, "total_steps": 38480, "loss": 0.0694, "lr": 5.592094881495341e-07, "epoch": 18.786382536382536, "percentage": 93.93, "elapsed_time": "1:27:00", "remaining_time": "0:05:37", "throughput": 1319.83, "total_tokens": 6889752}
7248
+ {"current_steps": 36150, "total_steps": 38480, "loss": 0.2272, "lr": 5.568270907288287e-07, "epoch": 18.78898128898129, "percentage": 93.94, "elapsed_time": "1:27:00", "remaining_time": "0:05:36", "throughput": 1319.83, "total_tokens": 6890648}
7249
+ {"current_steps": 36155, "total_steps": 38480, "loss": 0.0811, "lr": 5.544497218512689e-07, "epoch": 18.79158004158004, "percentage": 93.96, "elapsed_time": "1:27:01", "remaining_time": "0:05:35", "throughput": 1319.83, "total_tokens": 6891608}
7250
+ {"current_steps": 36160, "total_steps": 38480, "loss": 0.1046, "lr": 5.520773820059361e-07, "epoch": 18.794178794178794, "percentage": 93.97, "elapsed_time": "1:27:02", "remaining_time": "0:05:35", "throughput": 1319.83, "total_tokens": 6892536}
7251
+ {"current_steps": 36165, "total_steps": 38480, "loss": 0.0546, "lr": 5.497100716808784e-07, "epoch": 18.796777546777548, "percentage": 93.98, "elapsed_time": "1:27:02", "remaining_time": "0:05:34", "throughput": 1319.84, "total_tokens": 6893496}
7252
+ {"current_steps": 36170, "total_steps": 38480, "loss": 0.0666, "lr": 5.47347791363112e-07, "epoch": 18.7993762993763, "percentage": 94.0, "elapsed_time": "1:27:03", "remaining_time": "0:05:33", "throughput": 1319.84, "total_tokens": 6894424}
7253
+ {"current_steps": 36175, "total_steps": 38480, "loss": 0.0909, "lr": 5.449905415386119e-07, "epoch": 18.801975051975052, "percentage": 94.01, "elapsed_time": "1:27:04", "remaining_time": "0:05:32", "throughput": 1319.86, "total_tokens": 6895416}
7254
+ {"current_steps": 36180, "total_steps": 38480, "loss": 0.1225, "lr": 5.426383226923154e-07, "epoch": 18.804573804573806, "percentage": 94.02, "elapsed_time": "1:27:05", "remaining_time": "0:05:32", "throughput": 1319.86, "total_tokens": 6896376}
7255
+ {"current_steps": 36185, "total_steps": 38480, "loss": 0.0506, "lr": 5.402911353081352e-07, "epoch": 18.807172557172557, "percentage": 94.04, "elapsed_time": "1:27:05", "remaining_time": "0:05:31", "throughput": 1319.87, "total_tokens": 6897336}
7256
+ {"current_steps": 36190, "total_steps": 38480, "loss": 0.1656, "lr": 5.379489798689435e-07, "epoch": 18.80977130977131, "percentage": 94.05, "elapsed_time": "1:27:06", "remaining_time": "0:05:30", "throughput": 1319.89, "total_tokens": 6898328}
7257
+ {"current_steps": 36195, "total_steps": 38480, "loss": 0.1036, "lr": 5.356118568565744e-07, "epoch": 18.81237006237006, "percentage": 94.06, "elapsed_time": "1:27:07", "remaining_time": "0:05:29", "throughput": 1319.89, "total_tokens": 6899256}
7258
+ {"current_steps": 36200, "total_steps": 38480, "loss": 0.1022, "lr": 5.332797667518346e-07, "epoch": 18.814968814968815, "percentage": 94.07, "elapsed_time": "1:27:07", "remaining_time": "0:05:29", "throughput": 1319.9, "total_tokens": 6900216}
7259
+ {"current_steps": 36205, "total_steps": 38480, "loss": 0.1031, "lr": 5.30952710034488e-07, "epoch": 18.81756756756757, "percentage": 94.09, "elapsed_time": "1:27:08", "remaining_time": "0:05:28", "throughput": 1319.92, "total_tokens": 6901240}
7260
+ {"current_steps": 36210, "total_steps": 38480, "loss": 0.1579, "lr": 5.286306871832653e-07, "epoch": 18.82016632016632, "percentage": 94.1, "elapsed_time": "1:27:09", "remaining_time": "0:05:27", "throughput": 1319.93, "total_tokens": 6902232}
7261
+ {"current_steps": 36215, "total_steps": 38480, "loss": 0.2262, "lr": 5.263136986758593e-07, "epoch": 18.822765072765073, "percentage": 94.11, "elapsed_time": "1:27:09", "remaining_time": "0:05:27", "throughput": 1319.95, "total_tokens": 6903224}
7262
+ {"current_steps": 36220, "total_steps": 38480, "loss": 0.1134, "lr": 5.240017449889361e-07, "epoch": 18.825363825363826, "percentage": 94.13, "elapsed_time": "1:27:10", "remaining_time": "0:05:26", "throughput": 1319.94, "total_tokens": 6904120}
7263
+ {"current_steps": 36225, "total_steps": 38480, "loss": 0.2843, "lr": 5.216948265981153e-07, "epoch": 18.827962577962577, "percentage": 94.14, "elapsed_time": "1:27:11", "remaining_time": "0:05:25", "throughput": 1319.95, "total_tokens": 6905112}
7264
+ {"current_steps": 36230, "total_steps": 38480, "loss": 0.0225, "lr": 5.193929439779865e-07, "epoch": 18.83056133056133, "percentage": 94.15, "elapsed_time": "1:27:12", "remaining_time": "0:05:24", "throughput": 1319.96, "total_tokens": 6906072}
7265
+ {"current_steps": 36235, "total_steps": 38480, "loss": 0.1872, "lr": 5.170960976020989e-07, "epoch": 18.833160083160084, "percentage": 94.17, "elapsed_time": "1:27:12", "remaining_time": "0:05:24", "throughput": 1319.97, "total_tokens": 6907032}
7266
+ {"current_steps": 36240, "total_steps": 38480, "loss": 0.1012, "lr": 5.148042879429771e-07, "epoch": 18.835758835758835, "percentage": 94.18, "elapsed_time": "1:27:13", "remaining_time": "0:05:23", "throughput": 1319.98, "total_tokens": 6907992}
7267
+ {"current_steps": 36245, "total_steps": 38480, "loss": 0.1512, "lr": 5.125175154720912e-07, "epoch": 18.83835758835759, "percentage": 94.19, "elapsed_time": "1:27:14", "remaining_time": "0:05:22", "throughput": 1319.98, "total_tokens": 6908952}
7268
+ {"current_steps": 36250, "total_steps": 38480, "loss": 0.2003, "lr": 5.102357806598845e-07, "epoch": 18.840956340956343, "percentage": 94.2, "elapsed_time": "1:27:14", "remaining_time": "0:05:22", "throughput": 1320.0, "total_tokens": 6909944}
7269
+ {"current_steps": 36255, "total_steps": 38480, "loss": 0.1705, "lr": 5.079590839757731e-07, "epoch": 18.843555093555093, "percentage": 94.22, "elapsed_time": "1:27:15", "remaining_time": "0:05:21", "throughput": 1319.99, "total_tokens": 6910840}
7270
+ {"current_steps": 36260, "total_steps": 38480, "loss": 0.2345, "lr": 5.056874258881211e-07, "epoch": 18.846153846153847, "percentage": 94.23, "elapsed_time": "1:27:16", "remaining_time": "0:05:20", "throughput": 1320.0, "total_tokens": 6911832}
7271
+ {"current_steps": 36265, "total_steps": 38480, "loss": 0.1088, "lr": 5.034208068642632e-07, "epoch": 18.848752598752597, "percentage": 94.24, "elapsed_time": "1:27:16", "remaining_time": "0:05:19", "throughput": 1320.0, "total_tokens": 6912760}
7272
+ {"current_steps": 36270, "total_steps": 38480, "loss": 0.062, "lr": 5.011592273704984e-07, "epoch": 18.85135135135135, "percentage": 94.26, "elapsed_time": "1:27:17", "remaining_time": "0:05:19", "throughput": 1320.01, "total_tokens": 6913720}
7273
+ {"current_steps": 36275, "total_steps": 38480, "loss": 0.1214, "lr": 4.989026878720882e-07, "epoch": 18.853950103950105, "percentage": 94.27, "elapsed_time": "1:27:18", "remaining_time": "0:05:18", "throughput": 1320.01, "total_tokens": 6914616}
7274
+ {"current_steps": 36280, "total_steps": 38480, "loss": 0.0922, "lr": 4.966511888332553e-07, "epoch": 18.856548856548855, "percentage": 94.28, "elapsed_time": "1:27:19", "remaining_time": "0:05:17", "throughput": 1320.02, "total_tokens": 6915576}
7275
+ {"current_steps": 36285, "total_steps": 38480, "loss": 0.1142, "lr": 4.944047307171851e-07, "epoch": 18.85914760914761, "percentage": 94.3, "elapsed_time": "1:27:19", "remaining_time": "0:05:16", "throughput": 1320.01, "total_tokens": 6916440}
7276
+ {"current_steps": 36290, "total_steps": 38480, "loss": 0.2162, "lr": 4.921633139860299e-07, "epoch": 18.861746361746363, "percentage": 94.31, "elapsed_time": "1:27:20", "remaining_time": "0:05:16", "throughput": 1320.01, "total_tokens": 6917400}
7277
+ {"current_steps": 36295, "total_steps": 38480, "loss": 0.2078, "lr": 4.899269391009042e-07, "epoch": 18.864345114345113, "percentage": 94.32, "elapsed_time": "1:27:21", "remaining_time": "0:05:15", "throughput": 1320.02, "total_tokens": 6918360}
7278
+ {"current_steps": 36300, "total_steps": 38480, "loss": 0.1161, "lr": 4.876956065218791e-07, "epoch": 18.866943866943867, "percentage": 94.33, "elapsed_time": "1:27:21", "remaining_time": "0:05:14", "throughput": 1320.03, "total_tokens": 6919352}
7279
+ {"current_steps": 36305, "total_steps": 38480, "loss": 0.0523, "lr": 4.854693167079982e-07, "epoch": 18.86954261954262, "percentage": 94.35, "elapsed_time": "1:27:22", "remaining_time": "0:05:14", "throughput": 1320.02, "total_tokens": 6920216}
7280
+ {"current_steps": 36310, "total_steps": 38480, "loss": 0.1412, "lr": 4.832480701172564e-07, "epoch": 18.87214137214137, "percentage": 94.36, "elapsed_time": "1:27:23", "remaining_time": "0:05:13", "throughput": 1320.04, "total_tokens": 6921240}
7281
+ {"current_steps": 36315, "total_steps": 38480, "loss": 0.2642, "lr": 4.810318672066216e-07, "epoch": 18.874740124740125, "percentage": 94.37, "elapsed_time": "1:27:23", "remaining_time": "0:05:12", "throughput": 1320.07, "total_tokens": 6922296}
7282
+ {"current_steps": 36320, "total_steps": 38480, "loss": 0.1963, "lr": 4.788207084320178e-07, "epoch": 18.87733887733888, "percentage": 94.39, "elapsed_time": "1:27:24", "remaining_time": "0:05:11", "throughput": 1320.06, "total_tokens": 6923192}
7283
+ {"current_steps": 36325, "total_steps": 38480, "loss": 0.1133, "lr": 4.7661459424833134e-07, "epoch": 18.87993762993763, "percentage": 94.4, "elapsed_time": "1:27:25", "remaining_time": "0:05:11", "throughput": 1320.08, "total_tokens": 6924216}
7284
+ {"current_steps": 36330, "total_steps": 38480, "loss": 0.1267, "lr": 4.744135251094184e-07, "epoch": 18.882536382536383, "percentage": 94.41, "elapsed_time": "1:27:25", "remaining_time": "0:05:10", "throughput": 1320.08, "total_tokens": 6925144}
7285
+ {"current_steps": 36335, "total_steps": 38480, "loss": 0.0569, "lr": 4.722175014680835e-07, "epoch": 18.885135135135137, "percentage": 94.43, "elapsed_time": "1:27:26", "remaining_time": "0:05:09", "throughput": 1320.09, "total_tokens": 6926104}
7286
+ {"current_steps": 36340, "total_steps": 38480, "loss": 0.1308, "lr": 4.7002652377610423e-07, "epoch": 18.887733887733887, "percentage": 94.44, "elapsed_time": "1:27:27", "remaining_time": "0:05:09", "throughput": 1320.1, "total_tokens": 6927064}
7287
+ {"current_steps": 36345, "total_steps": 38480, "loss": 0.155, "lr": 4.6784059248422276e-07, "epoch": 18.89033264033264, "percentage": 94.45, "elapsed_time": "1:27:28", "remaining_time": "0:05:08", "throughput": 1320.1, "total_tokens": 6927992}
7288
+ {"current_steps": 36350, "total_steps": 38480, "loss": 0.1634, "lr": 4.656597080421293e-07, "epoch": 18.89293139293139, "percentage": 94.46, "elapsed_time": "1:27:28", "remaining_time": "0:05:07", "throughput": 1320.1, "total_tokens": 6928952}
7289
+ {"current_steps": 36355, "total_steps": 38480, "loss": 0.1802, "lr": 4.6348387089848456e-07, "epoch": 18.895530145530145, "percentage": 94.48, "elapsed_time": "1:27:29", "remaining_time": "0:05:06", "throughput": 1320.11, "total_tokens": 6929912}
7290
+ {"current_steps": 36360, "total_steps": 38480, "loss": 0.1171, "lr": 4.613130815009137e-07, "epoch": 18.8981288981289, "percentage": 94.49, "elapsed_time": "1:27:30", "remaining_time": "0:05:06", "throughput": 1320.12, "total_tokens": 6930872}
7291
+ {"current_steps": 36365, "total_steps": 38480, "loss": 0.1129, "lr": 4.591473402959984e-07, "epoch": 18.90072765072765, "percentage": 94.5, "elapsed_time": "1:27:30", "remaining_time": "0:05:05", "throughput": 1320.11, "total_tokens": 6931768}
7292
+ {"current_steps": 36370, "total_steps": 38480, "loss": 0.1081, "lr": 4.56986647729285e-07, "epoch": 18.903326403326403, "percentage": 94.52, "elapsed_time": "1:27:31", "remaining_time": "0:05:04", "throughput": 1320.11, "total_tokens": 6932664}
7293
+ {"current_steps": 36375, "total_steps": 38480, "loss": 0.1643, "lr": 4.548310042452736e-07, "epoch": 18.905925155925157, "percentage": 94.53, "elapsed_time": "1:27:32", "remaining_time": "0:05:03", "throughput": 1320.11, "total_tokens": 6933592}
7294
+ {"current_steps": 36380, "total_steps": 38480, "loss": 0.1055, "lr": 4.5268041028743714e-07, "epoch": 18.908523908523907, "percentage": 94.54, "elapsed_time": "1:27:32", "remaining_time": "0:05:03", "throughput": 1320.11, "total_tokens": 6934520}
7295
+ {"current_steps": 36385, "total_steps": 38480, "loss": 0.1443, "lr": 4.505348662982023e-07, "epoch": 18.91112266112266, "percentage": 94.56, "elapsed_time": "1:27:33", "remaining_time": "0:05:02", "throughput": 1320.1, "total_tokens": 6935416}
7296
+ {"current_steps": 36390, "total_steps": 38480, "loss": 0.088, "lr": 4.483943727189577e-07, "epoch": 18.913721413721415, "percentage": 94.57, "elapsed_time": "1:27:34", "remaining_time": "0:05:01", "throughput": 1320.12, "total_tokens": 6936440}
7297
+ {"current_steps": 36395, "total_steps": 38480, "loss": 0.1386, "lr": 4.4625892999005383e-07, "epoch": 18.916320166320165, "percentage": 94.58, "elapsed_time": "1:27:35", "remaining_time": "0:05:01", "throughput": 1320.13, "total_tokens": 6937368}
7298
+ {"current_steps": 36400, "total_steps": 38480, "loss": 0.0768, "lr": 4.4412853855080317e-07, "epoch": 18.91891891891892, "percentage": 94.59, "elapsed_time": "1:27:35", "remaining_time": "0:05:00", "throughput": 1320.13, "total_tokens": 6938296}
7299
+ {"current_steps": 36405, "total_steps": 38480, "loss": 0.089, "lr": 4.4200319883948013e-07, "epoch": 18.921517671517673, "percentage": 94.61, "elapsed_time": "1:27:36", "remaining_time": "0:04:59", "throughput": 1320.13, "total_tokens": 6939256}
7300
+ {"current_steps": 36410, "total_steps": 38480, "loss": 0.1282, "lr": 4.398829112933128e-07, "epoch": 18.924116424116423, "percentage": 94.62, "elapsed_time": "1:27:37", "remaining_time": "0:04:58", "throughput": 1320.14, "total_tokens": 6940216}
7301
+ {"current_steps": 36415, "total_steps": 38480, "loss": 0.1177, "lr": 4.377676763484967e-07, "epoch": 18.926715176715177, "percentage": 94.63, "elapsed_time": "1:27:37", "remaining_time": "0:04:58", "throughput": 1320.14, "total_tokens": 6941144}
7302
+ {"current_steps": 36420, "total_steps": 38480, "loss": 0.2052, "lr": 4.356574944401892e-07, "epoch": 18.929313929313928, "percentage": 94.65, "elapsed_time": "1:27:38", "remaining_time": "0:04:57", "throughput": 1320.15, "total_tokens": 6942104}
7303
+ {"current_steps": 36425, "total_steps": 38480, "loss": 0.1373, "lr": 4.335523660025043e-07, "epoch": 18.93191268191268, "percentage": 94.66, "elapsed_time": "1:27:39", "remaining_time": "0:04:56", "throughput": 1320.15, "total_tokens": 6943032}
7304
+ {"current_steps": 36430, "total_steps": 38480, "loss": 0.0567, "lr": 4.314522914685121e-07, "epoch": 18.934511434511435, "percentage": 94.67, "elapsed_time": "1:27:39", "remaining_time": "0:04:55", "throughput": 1320.17, "total_tokens": 6944056}
7305
+ {"current_steps": 36435, "total_steps": 38480, "loss": 0.1092, "lr": 4.2935727127025314e-07, "epoch": 18.937110187110186, "percentage": 94.69, "elapsed_time": "1:27:40", "remaining_time": "0:04:55", "throughput": 1320.18, "total_tokens": 6945016}
7306
+ {"current_steps": 36440, "total_steps": 38480, "loss": 0.1147, "lr": 4.2726730583872165e-07, "epoch": 18.93970893970894, "percentage": 94.7, "elapsed_time": "1:27:41", "remaining_time": "0:04:54", "throughput": 1320.18, "total_tokens": 6945944}
7307
+ {"current_steps": 36445, "total_steps": 38480, "loss": 0.0596, "lr": 4.2518239560387086e-07, "epoch": 18.942307692307693, "percentage": 94.71, "elapsed_time": "1:27:42", "remaining_time": "0:04:53", "throughput": 1320.19, "total_tokens": 6946904}
7308
+ {"current_steps": 36450, "total_steps": 38480, "loss": 0.0631, "lr": 4.231025409946188e-07, "epoch": 18.944906444906444, "percentage": 94.72, "elapsed_time": "1:27:42", "remaining_time": "0:04:53", "throughput": 1320.19, "total_tokens": 6947864}
7309
+ {"current_steps": 36455, "total_steps": 38480, "loss": 0.2906, "lr": 4.210277424388426e-07, "epoch": 18.947505197505198, "percentage": 94.74, "elapsed_time": "1:27:43", "remaining_time": "0:04:52", "throughput": 1320.19, "total_tokens": 6948792}
7310
+ {"current_steps": 36460, "total_steps": 38480, "loss": 0.1581, "lr": 4.189580003633731e-07, "epoch": 18.95010395010395, "percentage": 94.75, "elapsed_time": "1:27:44", "remaining_time": "0:04:51", "throughput": 1320.21, "total_tokens": 6949816}
7311
+ {"current_steps": 36465, "total_steps": 38480, "loss": 0.0708, "lr": 4.1689331519400853e-07, "epoch": 18.9527027027027, "percentage": 94.76, "elapsed_time": "1:27:44", "remaining_time": "0:04:50", "throughput": 1320.2, "total_tokens": 6950680}
7312
+ {"current_steps": 36470, "total_steps": 38480, "loss": 0.1491, "lr": 4.1483368735550645e-07, "epoch": 18.955301455301456, "percentage": 94.78, "elapsed_time": "1:27:45", "remaining_time": "0:04:50", "throughput": 1320.21, "total_tokens": 6951640}
7313
+ {"current_steps": 36475, "total_steps": 38480, "loss": 0.1037, "lr": 4.127791172715778e-07, "epoch": 18.95790020790021, "percentage": 94.79, "elapsed_time": "1:27:46", "remaining_time": "0:04:49", "throughput": 1320.21, "total_tokens": 6952600}
7314
+ {"current_steps": 36480, "total_steps": 38480, "loss": 0.201, "lr": 4.107296053648985e-07, "epoch": 18.96049896049896, "percentage": 94.8, "elapsed_time": "1:27:46", "remaining_time": "0:04:48", "throughput": 1320.23, "total_tokens": 6953592}
7315
+ {"current_steps": 36485, "total_steps": 38480, "loss": 0.0782, "lr": 4.086851520570978e-07, "epoch": 18.963097713097714, "percentage": 94.82, "elapsed_time": "1:27:47", "remaining_time": "0:04:48", "throughput": 1320.22, "total_tokens": 6954488}
7316
+ {"current_steps": 36490, "total_steps": 38480, "loss": 0.1844, "lr": 4.066457577687727e-07, "epoch": 18.965696465696467, "percentage": 94.83, "elapsed_time": "1:27:48", "remaining_time": "0:04:47", "throughput": 1320.23, "total_tokens": 6955448}
7317
+ {"current_steps": 36495, "total_steps": 38480, "loss": 0.1865, "lr": 4.046114229194764e-07, "epoch": 18.968295218295218, "percentage": 94.84, "elapsed_time": "1:27:49", "remaining_time": "0:04:46", "throughput": 1320.24, "total_tokens": 6956440}
7318
+ {"current_steps": 36500, "total_steps": 38480, "loss": 0.0802, "lr": 4.0258214792771585e-07, "epoch": 18.97089397089397, "percentage": 94.85, "elapsed_time": "1:27:49", "remaining_time": "0:04:45", "throughput": 1320.24, "total_tokens": 6957336}
7319
+ {"current_steps": 36505, "total_steps": 38480, "loss": 0.1119, "lr": 4.0055793321096266e-07, "epoch": 18.973492723492722, "percentage": 94.87, "elapsed_time": "1:27:50", "remaining_time": "0:04:45", "throughput": 1320.25, "total_tokens": 6958328}
7320
+ {"current_steps": 36510, "total_steps": 38480, "loss": 0.0962, "lr": 3.9853877918564764e-07, "epoch": 18.976091476091476, "percentage": 94.88, "elapsed_time": "1:27:51", "remaining_time": "0:04:44", "throughput": 1320.27, "total_tokens": 6959384}
7321
+ {"current_steps": 36515, "total_steps": 38480, "loss": 0.1405, "lr": 3.965246862671579e-07, "epoch": 18.97869022869023, "percentage": 94.89, "elapsed_time": "1:27:51", "remaining_time": "0:04:43", "throughput": 1320.27, "total_tokens": 6960312}
7322
+ {"current_steps": 36520, "total_steps": 38480, "loss": 0.0805, "lr": 3.9451565486983976e-07, "epoch": 18.98128898128898, "percentage": 94.91, "elapsed_time": "1:27:52", "remaining_time": "0:04:42", "throughput": 1320.29, "total_tokens": 6961304}
7323
+ {"current_steps": 36525, "total_steps": 38480, "loss": 0.079, "lr": 3.925116854069988e-07, "epoch": 18.983887733887734, "percentage": 94.92, "elapsed_time": "1:27:53", "remaining_time": "0:04:42", "throughput": 1320.31, "total_tokens": 6962360}
7324
+ {"current_steps": 36530, "total_steps": 38480, "loss": 0.065, "lr": 3.905127782909024e-07, "epoch": 18.986486486486488, "percentage": 94.93, "elapsed_time": "1:27:53", "remaining_time": "0:04:41", "throughput": 1320.31, "total_tokens": 6963256}
7325
+ {"current_steps": 36535, "total_steps": 38480, "loss": 0.0723, "lr": 3.885189339327688e-07, "epoch": 18.989085239085238, "percentage": 94.95, "elapsed_time": "1:27:54", "remaining_time": "0:04:40", "throughput": 1320.31, "total_tokens": 6964216}
7326
+ {"current_steps": 36540, "total_steps": 38480, "loss": 0.0551, "lr": 3.8653015274278117e-07, "epoch": 18.991683991683992, "percentage": 94.96, "elapsed_time": "1:27:55", "remaining_time": "0:04:40", "throughput": 1320.31, "total_tokens": 6965144}
7327
+ {"current_steps": 36545, "total_steps": 38480, "loss": 0.1159, "lr": 3.845464351300787e-07, "epoch": 18.994282744282746, "percentage": 94.97, "elapsed_time": "1:27:56", "remaining_time": "0:04:39", "throughput": 1320.32, "total_tokens": 6966104}
7328
+ {"current_steps": 36550, "total_steps": 38480, "loss": 0.0774, "lr": 3.825677815027628e-07, "epoch": 18.996881496881496, "percentage": 94.98, "elapsed_time": "1:27:56", "remaining_time": "0:04:38", "throughput": 1320.32, "total_tokens": 6967032}
7329
+ {"current_steps": 36555, "total_steps": 38480, "loss": 0.0517, "lr": 3.8059419226788297e-07, "epoch": 18.99948024948025, "percentage": 95.0, "elapsed_time": "1:27:57", "remaining_time": "0:04:37", "throughput": 1320.32, "total_tokens": 6967928}
7330
+ {"current_steps": 36556, "total_steps": 38480, "eval_loss": 0.1469559222459793, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "1:28:05", "remaining_time": "0:04:38", "throughput": 1318.3, "total_tokens": 6968096}
7331
+ {"current_steps": 36560, "total_steps": 38480, "loss": 0.1782, "lr": 3.7862566783146147e-07, "epoch": 19.002079002079004, "percentage": 95.01, "elapsed_time": "1:28:07", "remaining_time": "0:04:37", "throughput": 1318.02, "total_tokens": 6968832}
7332
+ {"current_steps": 36565, "total_steps": 38480, "loss": 0.0913, "lr": 3.766622085984661e-07, "epoch": 19.004677754677754, "percentage": 95.02, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6969760}
7333
+ {"current_steps": 36570, "total_steps": 38480, "loss": 0.1051, "lr": 3.747038149728266e-07, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:28:08", "remaining_time": "0:04:36", "throughput": 1317.99, "total_tokens": 6970688}