rbelanec commited on
Commit
43d36db
·
verified ·
1 Parent(s): b8b86d0

Training in progress, step 36252

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +383 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb7bdcff8b23a83c4b8f0b96cf78775a4ca68b9452b77260db39d3c7f1b7295
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1d68a1826f35377223a6cd6cad40ef68d92fe5f7c5fad78e594621bfc467a5
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -6888,3 +6888,386 @@
6888
  {"current_steps": 34350, "total_steps": 38160, "loss": 0.4229, "lr": 3.0075523637592474e-05, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:28:03", "remaining_time": "0:09:45", "throughput": 4240.67, "total_tokens": 22403888}
6889
  {"current_steps": 34355, "total_steps": 38160, "loss": 0.4744, "lr": 2.999745597097847e-05, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:28:03", "remaining_time": "0:09:45", "throughput": 4240.8, "total_tokens": 22408208}
6890
  {"current_steps": 34360, "total_steps": 38160, "loss": 0.4145, "lr": 2.9919486623534497e-05, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:28:04", "remaining_time": "0:09:44", "throughput": 4240.86, "total_tokens": 22411856}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6888
  {"current_steps": 34350, "total_steps": 38160, "loss": 0.4229, "lr": 3.0075523637592474e-05, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:28:03", "remaining_time": "0:09:45", "throughput": 4240.67, "total_tokens": 22403888}
6889
  {"current_steps": 34355, "total_steps": 38160, "loss": 0.4744, "lr": 2.999745597097847e-05, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:28:03", "remaining_time": "0:09:45", "throughput": 4240.8, "total_tokens": 22408208}
6890
  {"current_steps": 34360, "total_steps": 38160, "loss": 0.4145, "lr": 2.9919486623534497e-05, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:28:04", "remaining_time": "0:09:44", "throughput": 4240.86, "total_tokens": 22411856}
6891
+ {"current_steps": 34365, "total_steps": 38160, "loss": 0.3884, "lr": 2.9841615611571005e-05, "epoch": 18.011006289308177, "percentage": 90.06, "elapsed_time": "1:28:05", "remaining_time": "0:09:43", "throughput": 4240.88, "total_tokens": 22415248}
6892
+ {"current_steps": 34370, "total_steps": 38160, "loss": 0.4467, "lr": 2.9763842951377628e-05, "epoch": 18.01362683438155, "percentage": 90.07, "elapsed_time": "1:28:06", "remaining_time": "0:09:42", "throughput": 4240.89, "total_tokens": 22418416}
6893
+ {"current_steps": 34375, "total_steps": 38160, "loss": 0.403, "lr": 2.968616865922369e-05, "epoch": 18.016247379454928, "percentage": 90.08, "elapsed_time": "1:28:06", "remaining_time": "0:09:42", "throughput": 4240.91, "total_tokens": 22421456}
6894
+ {"current_steps": 34380, "total_steps": 38160, "loss": 0.2892, "lr": 2.960859275135758e-05, "epoch": 18.0188679245283, "percentage": 90.09, "elapsed_time": "1:28:07", "remaining_time": "0:09:41", "throughput": 4240.92, "total_tokens": 22424464}
6895
+ {"current_steps": 34385, "total_steps": 38160, "loss": 0.3506, "lr": 2.9531115244007367e-05, "epoch": 18.021488469601678, "percentage": 90.11, "elapsed_time": "1:28:08", "remaining_time": "0:09:40", "throughput": 4240.91, "total_tokens": 22427152}
6896
+ {"current_steps": 34390, "total_steps": 38160, "loss": 0.3666, "lr": 2.945373615338037e-05, "epoch": 18.02410901467505, "percentage": 90.12, "elapsed_time": "1:28:09", "remaining_time": "0:09:39", "throughput": 4241.05, "total_tokens": 22431696}
6897
+ {"current_steps": 34395, "total_steps": 38160, "loss": 0.5684, "lr": 2.9376455495663534e-05, "epoch": 18.02672955974843, "percentage": 90.13, "elapsed_time": "1:28:09", "remaining_time": "0:09:39", "throughput": 4241.06, "total_tokens": 22434704}
6898
+ {"current_steps": 34400, "total_steps": 38160, "loss": 0.4663, "lr": 2.9299273287023144e-05, "epoch": 18.029350104821802, "percentage": 90.15, "elapsed_time": "1:28:10", "remaining_time": "0:09:38", "throughput": 4241.08, "total_tokens": 22437776}
6899
+ {"current_steps": 34405, "total_steps": 38160, "loss": 0.3512, "lr": 2.922218954360473e-05, "epoch": 18.03197064989518, "percentage": 90.16, "elapsed_time": "1:28:11", "remaining_time": "0:09:37", "throughput": 4241.1, "total_tokens": 22440912}
6900
+ {"current_steps": 34410, "total_steps": 38160, "loss": 0.4478, "lr": 2.9145204281533278e-05, "epoch": 18.034591194968552, "percentage": 90.17, "elapsed_time": "1:28:12", "remaining_time": "0:09:36", "throughput": 4241.12, "total_tokens": 22444112}
6901
+ {"current_steps": 34415, "total_steps": 38160, "loss": 0.4255, "lr": 2.9068317516913244e-05, "epoch": 18.03721174004193, "percentage": 90.19, "elapsed_time": "1:28:12", "remaining_time": "0:09:35", "throughput": 4241.22, "total_tokens": 22448080}
6902
+ {"current_steps": 34420, "total_steps": 38160, "loss": 0.412, "lr": 2.8991529265828542e-05, "epoch": 18.039832285115303, "percentage": 90.2, "elapsed_time": "1:28:13", "remaining_time": "0:09:35", "throughput": 4241.19, "total_tokens": 22450800}
6903
+ {"current_steps": 34425, "total_steps": 38160, "loss": 0.484, "lr": 2.891483954434243e-05, "epoch": 18.04245283018868, "percentage": 90.21, "elapsed_time": "1:28:14", "remaining_time": "0:09:34", "throughput": 4241.2, "total_tokens": 22453648}
6904
+ {"current_steps": 34430, "total_steps": 38160, "loss": 0.3794, "lr": 2.8838248368497465e-05, "epoch": 18.045073375262053, "percentage": 90.23, "elapsed_time": "1:28:15", "remaining_time": "0:09:33", "throughput": 4241.35, "total_tokens": 22458480}
6905
+ {"current_steps": 34435, "total_steps": 38160, "loss": 0.4342, "lr": 2.8761755754315667e-05, "epoch": 18.04769392033543, "percentage": 90.24, "elapsed_time": "1:28:15", "remaining_time": "0:09:32", "throughput": 4241.39, "total_tokens": 22461840}
6906
+ {"current_steps": 34440, "total_steps": 38160, "loss": 0.2968, "lr": 2.8685361717798286e-05, "epoch": 18.050314465408807, "percentage": 90.25, "elapsed_time": "1:28:16", "remaining_time": "0:09:32", "throughput": 4241.43, "total_tokens": 22465104}
6907
+ {"current_steps": 34445, "total_steps": 38160, "loss": 0.4147, "lr": 2.8609066274926265e-05, "epoch": 18.05293501048218, "percentage": 90.26, "elapsed_time": "1:28:17", "remaining_time": "0:09:31", "throughput": 4241.52, "total_tokens": 22468976}
6908
+ {"current_steps": 34450, "total_steps": 38160, "loss": 0.3654, "lr": 2.8532869441659615e-05, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "1:28:18", "remaining_time": "0:09:30", "throughput": 4241.51, "total_tokens": 22471888}
6909
+ {"current_steps": 34455, "total_steps": 38160, "loss": 0.4113, "lr": 2.8456771233937973e-05, "epoch": 18.05817610062893, "percentage": 90.29, "elapsed_time": "1:28:18", "remaining_time": "0:09:29", "throughput": 4241.51, "total_tokens": 22474640}
6910
+ {"current_steps": 34460, "total_steps": 38160, "loss": 0.5413, "lr": 2.838077166768016e-05, "epoch": 18.060796645702307, "percentage": 90.3, "elapsed_time": "1:28:19", "remaining_time": "0:09:28", "throughput": 4241.47, "total_tokens": 22477168}
6911
+ {"current_steps": 34465, "total_steps": 38160, "loss": 0.5416, "lr": 2.8304870758784296e-05, "epoch": 18.06341719077568, "percentage": 90.32, "elapsed_time": "1:28:20", "remaining_time": "0:09:28", "throughput": 4241.48, "total_tokens": 22480240}
6912
+ {"current_steps": 34470, "total_steps": 38160, "loss": 0.4499, "lr": 2.822906852312812e-05, "epoch": 18.066037735849058, "percentage": 90.33, "elapsed_time": "1:28:20", "remaining_time": "0:09:27", "throughput": 4241.5, "total_tokens": 22483344}
6913
+ {"current_steps": 34475, "total_steps": 38160, "loss": 0.3865, "lr": 2.8153364976568563e-05, "epoch": 18.06865828092243, "percentage": 90.34, "elapsed_time": "1:28:21", "remaining_time": "0:09:26", "throughput": 4241.47, "total_tokens": 22485840}
6914
+ {"current_steps": 34480, "total_steps": 38160, "loss": 0.4024, "lr": 2.8077760134941955e-05, "epoch": 18.071278825995808, "percentage": 90.36, "elapsed_time": "1:28:22", "remaining_time": "0:09:25", "throughput": 4241.53, "total_tokens": 22489392}
6915
+ {"current_steps": 34485, "total_steps": 38160, "loss": 0.3471, "lr": 2.800225401406392e-05, "epoch": 18.07389937106918, "percentage": 90.37, "elapsed_time": "1:28:22", "remaining_time": "0:09:25", "throughput": 4241.47, "total_tokens": 22491696}
6916
+ {"current_steps": 34490, "total_steps": 38160, "loss": 0.4333, "lr": 2.7926846629729607e-05, "epoch": 18.07651991614256, "percentage": 90.38, "elapsed_time": "1:28:23", "remaining_time": "0:09:24", "throughput": 4241.46, "total_tokens": 22494448}
6917
+ {"current_steps": 34495, "total_steps": 38160, "loss": 0.4007, "lr": 2.7851537997713174e-05, "epoch": 18.079140461215932, "percentage": 90.4, "elapsed_time": "1:28:24", "remaining_time": "0:09:23", "throughput": 4241.46, "total_tokens": 22497264}
6918
+ {"current_steps": 34500, "total_steps": 38160, "loss": 0.3384, "lr": 2.7776328133768458e-05, "epoch": 18.08176100628931, "percentage": 90.41, "elapsed_time": "1:28:24", "remaining_time": "0:09:22", "throughput": 4241.46, "total_tokens": 22500080}
6919
+ {"current_steps": 34505, "total_steps": 38160, "loss": 0.4492, "lr": 2.770121705362849e-05, "epoch": 18.084381551362682, "percentage": 90.42, "elapsed_time": "1:28:25", "remaining_time": "0:09:21", "throughput": 4241.48, "total_tokens": 22503152}
6920
+ {"current_steps": 34510, "total_steps": 38160, "loss": 0.4776, "lr": 2.7626204773005704e-05, "epoch": 18.08700209643606, "percentage": 90.44, "elapsed_time": "1:28:26", "remaining_time": "0:09:21", "throughput": 4241.51, "total_tokens": 22506288}
6921
+ {"current_steps": 34515, "total_steps": 38160, "loss": 0.4782, "lr": 2.7551291307591765e-05, "epoch": 18.089622641509433, "percentage": 90.45, "elapsed_time": "1:28:26", "remaining_time": "0:09:20", "throughput": 4241.45, "total_tokens": 22508496}
6922
+ {"current_steps": 34520, "total_steps": 38160, "loss": 0.4567, "lr": 2.7476476673057636e-05, "epoch": 18.09224318658281, "percentage": 90.46, "elapsed_time": "1:28:27", "remaining_time": "0:09:19", "throughput": 4241.55, "total_tokens": 22512496}
6923
+ {"current_steps": 34525, "total_steps": 38160, "loss": 0.4225, "lr": 2.740176088505375e-05, "epoch": 18.094863731656183, "percentage": 90.47, "elapsed_time": "1:28:28", "remaining_time": "0:09:18", "throughput": 4241.6, "total_tokens": 22515888}
6924
+ {"current_steps": 34530, "total_steps": 38160, "loss": 0.3824, "lr": 2.7327143959209765e-05, "epoch": 18.09748427672956, "percentage": 90.49, "elapsed_time": "1:28:29", "remaining_time": "0:09:18", "throughput": 4241.62, "total_tokens": 22518928}
6925
+ {"current_steps": 34535, "total_steps": 38160, "loss": 0.3789, "lr": 2.725262591113481e-05, "epoch": 18.100104821802937, "percentage": 90.5, "elapsed_time": "1:28:29", "remaining_time": "0:09:17", "throughput": 4241.73, "total_tokens": 22523120}
6926
+ {"current_steps": 34540, "total_steps": 38160, "loss": 0.3732, "lr": 2.7178206756417078e-05, "epoch": 18.10272536687631, "percentage": 90.51, "elapsed_time": "1:28:30", "remaining_time": "0:09:16", "throughput": 4241.68, "total_tokens": 22525392}
6927
+ {"current_steps": 34545, "total_steps": 38160, "loss": 0.3853, "lr": 2.7103886510624344e-05, "epoch": 18.105345911949687, "percentage": 90.53, "elapsed_time": "1:28:31", "remaining_time": "0:09:15", "throughput": 4241.65, "total_tokens": 22527920}
6928
+ {"current_steps": 34550, "total_steps": 38160, "loss": 0.4668, "lr": 2.7029665189303387e-05, "epoch": 18.10796645702306, "percentage": 90.54, "elapsed_time": "1:28:31", "remaining_time": "0:09:15", "throughput": 4241.7, "total_tokens": 22531472}
6929
+ {"current_steps": 34555, "total_steps": 38160, "loss": 0.4821, "lr": 2.6955542807980515e-05, "epoch": 18.110587002096437, "percentage": 90.55, "elapsed_time": "1:28:32", "remaining_time": "0:09:14", "throughput": 4241.72, "total_tokens": 22534672}
6930
+ {"current_steps": 34560, "total_steps": 38160, "loss": 0.4549, "lr": 2.688151938216138e-05, "epoch": 18.11320754716981, "percentage": 90.57, "elapsed_time": "1:28:33", "remaining_time": "0:09:13", "throughput": 4241.71, "total_tokens": 22537424}
6931
+ {"current_steps": 34565, "total_steps": 38160, "loss": 0.5083, "lr": 2.6807594927330703e-05, "epoch": 18.115828092243188, "percentage": 90.58, "elapsed_time": "1:28:34", "remaining_time": "0:09:12", "throughput": 4241.77, "total_tokens": 22540944}
6932
+ {"current_steps": 34570, "total_steps": 38160, "loss": 0.3702, "lr": 2.6733769458952727e-05, "epoch": 18.11844863731656, "percentage": 90.59, "elapsed_time": "1:28:34", "remaining_time": "0:09:11", "throughput": 4241.82, "total_tokens": 22544368}
6933
+ {"current_steps": 34575, "total_steps": 38160, "loss": 0.4153, "lr": 2.6660042992470934e-05, "epoch": 18.121069182389938, "percentage": 90.61, "elapsed_time": "1:28:35", "remaining_time": "0:09:11", "throughput": 4241.85, "total_tokens": 22547568}
6934
+ {"current_steps": 34580, "total_steps": 38160, "loss": 0.4046, "lr": 2.658641554330793e-05, "epoch": 18.12368972746331, "percentage": 90.62, "elapsed_time": "1:28:36", "remaining_time": "0:09:10", "throughput": 4241.95, "total_tokens": 22551504}
6935
+ {"current_steps": 34585, "total_steps": 38160, "loss": 0.409, "lr": 2.6512887126865782e-05, "epoch": 18.12631027253669, "percentage": 90.63, "elapsed_time": "1:28:36", "remaining_time": "0:09:09", "throughput": 4241.95, "total_tokens": 22554416}
6936
+ {"current_steps": 34590, "total_steps": 38160, "loss": 0.2697, "lr": 2.6439457758525908e-05, "epoch": 18.128930817610062, "percentage": 90.64, "elapsed_time": "1:28:37", "remaining_time": "0:09:08", "throughput": 4241.95, "total_tokens": 22557232}
6937
+ {"current_steps": 34595, "total_steps": 38160, "loss": 0.3729, "lr": 2.6366127453648748e-05, "epoch": 18.13155136268344, "percentage": 90.66, "elapsed_time": "1:28:38", "remaining_time": "0:09:08", "throughput": 4242.03, "total_tokens": 22560976}
6938
+ {"current_steps": 34600, "total_steps": 38160, "loss": 0.5576, "lr": 2.6292896227574303e-05, "epoch": 18.134171907756812, "percentage": 90.67, "elapsed_time": "1:28:39", "remaining_time": "0:09:07", "throughput": 4242.07, "total_tokens": 22564272}
6939
+ {"current_steps": 34605, "total_steps": 38160, "loss": 0.6959, "lr": 2.6219764095621547e-05, "epoch": 18.13679245283019, "percentage": 90.68, "elapsed_time": "1:28:39", "remaining_time": "0:09:06", "throughput": 4242.08, "total_tokens": 22567280}
6940
+ {"current_steps": 34610, "total_steps": 38160, "loss": 0.4199, "lr": 2.6146731073088958e-05, "epoch": 18.139412997903563, "percentage": 90.7, "elapsed_time": "1:28:40", "remaining_time": "0:09:05", "throughput": 4242.16, "total_tokens": 22571120}
6941
+ {"current_steps": 34615, "total_steps": 38160, "loss": 0.38, "lr": 2.607379717525432e-05, "epoch": 18.14203354297694, "percentage": 90.71, "elapsed_time": "1:28:41", "remaining_time": "0:09:04", "throughput": 4242.21, "total_tokens": 22574672}
6942
+ {"current_steps": 34620, "total_steps": 38160, "loss": 0.4761, "lr": 2.6000962417374373e-05, "epoch": 18.144654088050313, "percentage": 90.72, "elapsed_time": "1:28:42", "remaining_time": "0:09:04", "throughput": 4242.21, "total_tokens": 22577616}
6943
+ {"current_steps": 34625, "total_steps": 38160, "loss": 0.4897, "lr": 2.5928226814685485e-05, "epoch": 18.14727463312369, "percentage": 90.74, "elapsed_time": "1:28:42", "remaining_time": "0:09:03", "throughput": 4242.16, "total_tokens": 22579888}
6944
+ {"current_steps": 34630, "total_steps": 38160, "loss": 0.4687, "lr": 2.585559038240304e-05, "epoch": 18.149895178197063, "percentage": 90.75, "elapsed_time": "1:28:43", "remaining_time": "0:09:02", "throughput": 4242.17, "total_tokens": 22582896}
6945
+ {"current_steps": 34635, "total_steps": 38160, "loss": 0.3422, "lr": 2.5783053135721714e-05, "epoch": 18.15251572327044, "percentage": 90.76, "elapsed_time": "1:28:44", "remaining_time": "0:09:01", "throughput": 4242.15, "total_tokens": 22585616}
6946
+ {"current_steps": 34640, "total_steps": 38160, "loss": 0.5253, "lr": 2.571061508981565e-05, "epoch": 18.155136268343817, "percentage": 90.78, "elapsed_time": "1:28:44", "remaining_time": "0:09:01", "throughput": 4242.14, "total_tokens": 22588496}
6947
+ {"current_steps": 34645, "total_steps": 38160, "loss": 0.4101, "lr": 2.5638276259837778e-05, "epoch": 18.15775681341719, "percentage": 90.79, "elapsed_time": "1:28:45", "remaining_time": "0:09:00", "throughput": 4242.11, "total_tokens": 22590896}
6948
+ {"current_steps": 34650, "total_steps": 38160, "loss": 0.7325, "lr": 2.5566036660920778e-05, "epoch": 18.160377358490567, "percentage": 90.8, "elapsed_time": "1:28:45", "remaining_time": "0:08:59", "throughput": 4242.04, "total_tokens": 22593104}
6949
+ {"current_steps": 34655, "total_steps": 38160, "loss": 0.4124, "lr": 2.5493896308176223e-05, "epoch": 18.16299790356394, "percentage": 90.81, "elapsed_time": "1:28:46", "remaining_time": "0:08:58", "throughput": 4242.1, "total_tokens": 22596656}
6950
+ {"current_steps": 34660, "total_steps": 38160, "loss": 0.4359, "lr": 2.542185521669521e-05, "epoch": 18.165618448637318, "percentage": 90.83, "elapsed_time": "1:28:47", "remaining_time": "0:08:57", "throughput": 4242.08, "total_tokens": 22599312}
6951
+ {"current_steps": 34665, "total_steps": 38160, "loss": 0.3336, "lr": 2.534991340154774e-05, "epoch": 18.16823899371069, "percentage": 90.84, "elapsed_time": "1:28:48", "remaining_time": "0:08:57", "throughput": 4242.18, "total_tokens": 22603440}
6952
+ {"current_steps": 34670, "total_steps": 38160, "loss": 0.4386, "lr": 2.5278070877783332e-05, "epoch": 18.170859538784068, "percentage": 90.85, "elapsed_time": "1:28:48", "remaining_time": "0:08:56", "throughput": 4242.2, "total_tokens": 22606512}
6953
+ {"current_steps": 34675, "total_steps": 38160, "loss": 0.5036, "lr": 2.520632766043052e-05, "epoch": 18.17348008385744, "percentage": 90.87, "elapsed_time": "1:28:49", "remaining_time": "0:08:55", "throughput": 4242.22, "total_tokens": 22609584}
6954
+ {"current_steps": 34680, "total_steps": 38160, "loss": 0.4587, "lr": 2.513468376449729e-05, "epoch": 18.17610062893082, "percentage": 90.88, "elapsed_time": "1:28:50", "remaining_time": "0:08:54", "throughput": 4242.31, "total_tokens": 22613712}
6955
+ {"current_steps": 34685, "total_steps": 38160, "loss": 0.3843, "lr": 2.506313920497061e-05, "epoch": 18.178721174004192, "percentage": 90.89, "elapsed_time": "1:28:51", "remaining_time": "0:08:54", "throughput": 4242.35, "total_tokens": 22617008}
6956
+ {"current_steps": 34690, "total_steps": 38160, "loss": 0.282, "lr": 2.4991693996816888e-05, "epoch": 18.18134171907757, "percentage": 90.91, "elapsed_time": "1:28:51", "remaining_time": "0:08:53", "throughput": 4242.36, "total_tokens": 22620112}
6957
+ {"current_steps": 34695, "total_steps": 38160, "loss": 0.3383, "lr": 2.4920348154981677e-05, "epoch": 18.183962264150942, "percentage": 90.92, "elapsed_time": "1:28:52", "remaining_time": "0:08:52", "throughput": 4242.33, "total_tokens": 22622608}
6958
+ {"current_steps": 34700, "total_steps": 38160, "loss": 0.5241, "lr": 2.4849101694389477e-05, "epoch": 18.18658280922432, "percentage": 90.93, "elapsed_time": "1:28:53", "remaining_time": "0:08:51", "throughput": 4242.4, "total_tokens": 22626416}
6959
+ {"current_steps": 34705, "total_steps": 38160, "loss": 0.326, "lr": 2.4777954629944478e-05, "epoch": 18.189203354297693, "percentage": 90.95, "elapsed_time": "1:28:54", "remaining_time": "0:08:51", "throughput": 4242.42, "total_tokens": 22629328}
6960
+ {"current_steps": 34710, "total_steps": 38160, "loss": 0.5246, "lr": 2.4706906976529718e-05, "epoch": 18.19182389937107, "percentage": 90.96, "elapsed_time": "1:28:54", "remaining_time": "0:08:50", "throughput": 4242.41, "total_tokens": 22632144}
6961
+ {"current_steps": 34715, "total_steps": 38160, "loss": 0.4111, "lr": 2.4635958749007648e-05, "epoch": 18.194444444444443, "percentage": 90.97, "elapsed_time": "1:28:55", "remaining_time": "0:08:49", "throughput": 4242.4, "total_tokens": 22634992}
6962
+ {"current_steps": 34720, "total_steps": 38160, "loss": 0.3829, "lr": 2.456510996221978e-05, "epoch": 18.19706498951782, "percentage": 90.99, "elapsed_time": "1:28:56", "remaining_time": "0:08:48", "throughput": 4242.43, "total_tokens": 22638256}
6963
+ {"current_steps": 34725, "total_steps": 38160, "loss": 0.4402, "lr": 2.4494360630986756e-05, "epoch": 18.199685534591197, "percentage": 91.0, "elapsed_time": "1:28:56", "remaining_time": "0:08:47", "throughput": 4242.47, "total_tokens": 22641712}
6964
+ {"current_steps": 34730, "total_steps": 38160, "loss": 0.4189, "lr": 2.4423710770108687e-05, "epoch": 18.20230607966457, "percentage": 91.01, "elapsed_time": "1:28:57", "remaining_time": "0:08:47", "throughput": 4242.6, "total_tokens": 22645904}
6965
+ {"current_steps": 34735, "total_steps": 38160, "loss": 0.4479, "lr": 2.435316039436464e-05, "epoch": 18.204926624737947, "percentage": 91.02, "elapsed_time": "1:28:58", "remaining_time": "0:08:46", "throughput": 4242.6, "total_tokens": 22648944}
6966
+ {"current_steps": 34740, "total_steps": 38160, "loss": 0.4419, "lr": 2.428270951851297e-05, "epoch": 18.20754716981132, "percentage": 91.04, "elapsed_time": "1:28:59", "remaining_time": "0:08:45", "throughput": 4242.61, "total_tokens": 22651888}
6967
+ {"current_steps": 34745, "total_steps": 38160, "loss": 0.4568, "lr": 2.421235815729128e-05, "epoch": 18.210167714884697, "percentage": 91.05, "elapsed_time": "1:28:59", "remaining_time": "0:08:44", "throughput": 4242.63, "total_tokens": 22655120}
6968
+ {"current_steps": 34750, "total_steps": 38160, "loss": 0.7387, "lr": 2.414210632541619e-05, "epoch": 18.21278825995807, "percentage": 91.06, "elapsed_time": "1:29:00", "remaining_time": "0:08:44", "throughput": 4242.71, "total_tokens": 22658928}
6969
+ {"current_steps": 34755, "total_steps": 38160, "loss": 0.4457, "lr": 2.40719540375835e-05, "epoch": 18.215408805031448, "percentage": 91.08, "elapsed_time": "1:29:01", "remaining_time": "0:08:43", "throughput": 4242.75, "total_tokens": 22662288}
6970
+ {"current_steps": 34760, "total_steps": 38160, "loss": 0.3886, "lr": 2.4001901308468353e-05, "epoch": 18.21802935010482, "percentage": 91.09, "elapsed_time": "1:29:02", "remaining_time": "0:08:42", "throughput": 4242.72, "total_tokens": 22664848}
6971
+ {"current_steps": 34765, "total_steps": 38160, "loss": 0.3537, "lr": 2.3931948152724982e-05, "epoch": 18.220649895178198, "percentage": 91.1, "elapsed_time": "1:29:02", "remaining_time": "0:08:41", "throughput": 4242.68, "total_tokens": 22667376}
6972
+ {"current_steps": 34770, "total_steps": 38160, "loss": 0.4802, "lr": 2.386209458498684e-05, "epoch": 18.22327044025157, "percentage": 91.12, "elapsed_time": "1:29:03", "remaining_time": "0:08:40", "throughput": 4242.78, "total_tokens": 22671440}
6973
+ {"current_steps": 34775, "total_steps": 38160, "loss": 0.4087, "lr": 2.3792340619866458e-05, "epoch": 18.22589098532495, "percentage": 91.13, "elapsed_time": "1:29:04", "remaining_time": "0:08:40", "throughput": 4242.87, "total_tokens": 22675408}
6974
+ {"current_steps": 34780, "total_steps": 38160, "loss": 0.3748, "lr": 2.3722686271955507e-05, "epoch": 18.228511530398322, "percentage": 91.14, "elapsed_time": "1:29:05", "remaining_time": "0:08:39", "throughput": 4242.86, "total_tokens": 22678224}
6975
+ {"current_steps": 34785, "total_steps": 38160, "loss": 0.3309, "lr": 2.365313155582488e-05, "epoch": 18.2311320754717, "percentage": 91.16, "elapsed_time": "1:29:05", "remaining_time": "0:08:38", "throughput": 4242.92, "total_tokens": 22681712}
6976
+ {"current_steps": 34790, "total_steps": 38160, "loss": 0.2974, "lr": 2.358367648602472e-05, "epoch": 18.233752620545072, "percentage": 91.17, "elapsed_time": "1:29:06", "remaining_time": "0:08:37", "throughput": 4242.95, "total_tokens": 22685008}
6977
+ {"current_steps": 34795, "total_steps": 38160, "loss": 0.4638, "lr": 2.3514321077084234e-05, "epoch": 18.23637316561845, "percentage": 91.18, "elapsed_time": "1:29:07", "remaining_time": "0:08:37", "throughput": 4242.99, "total_tokens": 22688368}
6978
+ {"current_steps": 34800, "total_steps": 38160, "loss": 0.4309, "lr": 2.3445065343511763e-05, "epoch": 18.238993710691823, "percentage": 91.19, "elapsed_time": "1:29:07", "remaining_time": "0:08:36", "throughput": 4242.98, "total_tokens": 22691184}
6979
+ {"current_steps": 34805, "total_steps": 38160, "loss": 0.4477, "lr": 2.3375909299794717e-05, "epoch": 18.2416142557652, "percentage": 91.21, "elapsed_time": "1:29:08", "remaining_time": "0:08:35", "throughput": 4243.06, "total_tokens": 22695024}
6980
+ {"current_steps": 34810, "total_steps": 38160, "loss": 0.4103, "lr": 2.330685296039986e-05, "epoch": 18.244234800838573, "percentage": 91.22, "elapsed_time": "1:29:09", "remaining_time": "0:08:34", "throughput": 4243.16, "total_tokens": 22699088}
6981
+ {"current_steps": 34815, "total_steps": 38160, "loss": 0.3671, "lr": 2.3237896339772914e-05, "epoch": 18.24685534591195, "percentage": 91.23, "elapsed_time": "1:29:10", "remaining_time": "0:08:34", "throughput": 4243.21, "total_tokens": 22702480}
6982
+ {"current_steps": 34820, "total_steps": 38160, "loss": 0.2956, "lr": 2.3169039452338892e-05, "epoch": 18.249475890985323, "percentage": 91.25, "elapsed_time": "1:29:11", "remaining_time": "0:08:33", "throughput": 4243.24, "total_tokens": 22705840}
6983
+ {"current_steps": 34825, "total_steps": 38160, "loss": 0.4418, "lr": 2.3100282312501886e-05, "epoch": 18.2520964360587, "percentage": 91.26, "elapsed_time": "1:29:11", "remaining_time": "0:08:32", "throughput": 4243.24, "total_tokens": 22708752}
6984
+ {"current_steps": 34830, "total_steps": 38160, "loss": 0.4557, "lr": 2.3031624934645113e-05, "epoch": 18.254716981132077, "percentage": 91.27, "elapsed_time": "1:29:12", "remaining_time": "0:08:31", "throughput": 4243.21, "total_tokens": 22711504}
6985
+ {"current_steps": 34835, "total_steps": 38160, "loss": 0.6007, "lr": 2.296306733313075e-05, "epoch": 18.25733752620545, "percentage": 91.29, "elapsed_time": "1:29:13", "remaining_time": "0:08:30", "throughput": 4243.22, "total_tokens": 22714384}
6986
+ {"current_steps": 34840, "total_steps": 38160, "loss": 0.447, "lr": 2.289460952230038e-05, "epoch": 18.259958071278827, "percentage": 91.3, "elapsed_time": "1:29:13", "remaining_time": "0:08:30", "throughput": 4243.3, "total_tokens": 22718192}
6987
+ {"current_steps": 34845, "total_steps": 38160, "loss": 0.4613, "lr": 2.2826251516474604e-05, "epoch": 18.2625786163522, "percentage": 91.31, "elapsed_time": "1:29:14", "remaining_time": "0:08:29", "throughput": 4243.34, "total_tokens": 22721616}
6988
+ {"current_steps": 34850, "total_steps": 38160, "loss": 0.5123, "lr": 2.2757993329953152e-05, "epoch": 18.265199161425578, "percentage": 91.33, "elapsed_time": "1:29:15", "remaining_time": "0:08:28", "throughput": 4243.35, "total_tokens": 22724624}
6989
+ {"current_steps": 34855, "total_steps": 38160, "loss": 0.4834, "lr": 2.2689834977014822e-05, "epoch": 18.26781970649895, "percentage": 91.34, "elapsed_time": "1:29:16", "remaining_time": "0:08:27", "throughput": 4243.35, "total_tokens": 22727600}
6990
+ {"current_steps": 34860, "total_steps": 38160, "loss": 0.2946, "lr": 2.2621776471917598e-05, "epoch": 18.270440251572328, "percentage": 91.35, "elapsed_time": "1:29:16", "remaining_time": "0:08:27", "throughput": 4243.37, "total_tokens": 22730608}
6991
+ {"current_steps": 34865, "total_steps": 38160, "loss": 0.4198, "lr": 2.255381782889848e-05, "epoch": 18.2730607966457, "percentage": 91.37, "elapsed_time": "1:29:17", "remaining_time": "0:08:26", "throughput": 4243.37, "total_tokens": 22733424}
6992
+ {"current_steps": 34870, "total_steps": 38160, "loss": 0.397, "lr": 2.2485959062173654e-05, "epoch": 18.27568134171908, "percentage": 91.38, "elapsed_time": "1:29:18", "remaining_time": "0:08:25", "throughput": 4243.43, "total_tokens": 22737008}
6993
+ {"current_steps": 34875, "total_steps": 38160, "loss": 0.4014, "lr": 2.2418200185938485e-05, "epoch": 18.278301886792452, "percentage": 91.39, "elapsed_time": "1:29:18", "remaining_time": "0:08:24", "throughput": 4243.46, "total_tokens": 22740272}
6994
+ {"current_steps": 34880, "total_steps": 38160, "loss": 0.3965, "lr": 2.235054121436725e-05, "epoch": 18.28092243186583, "percentage": 91.4, "elapsed_time": "1:29:19", "remaining_time": "0:08:24", "throughput": 4243.57, "total_tokens": 22744464}
6995
+ {"current_steps": 34885, "total_steps": 38160, "loss": 0.4315, "lr": 2.2282982161613562e-05, "epoch": 18.283542976939202, "percentage": 91.42, "elapsed_time": "1:29:20", "remaining_time": "0:08:23", "throughput": 4243.55, "total_tokens": 22747152}
6996
+ {"current_steps": 34890, "total_steps": 38160, "loss": 0.4046, "lr": 2.221552304180985e-05, "epoch": 18.28616352201258, "percentage": 91.43, "elapsed_time": "1:29:21", "remaining_time": "0:08:22", "throughput": 4243.67, "total_tokens": 22751408}
6997
+ {"current_steps": 34895, "total_steps": 38160, "loss": 0.4814, "lr": 2.2148163869067816e-05, "epoch": 18.288784067085953, "percentage": 91.44, "elapsed_time": "1:29:22", "remaining_time": "0:08:21", "throughput": 4243.79, "total_tokens": 22755568}
6998
+ {"current_steps": 34900, "total_steps": 38160, "loss": 0.3868, "lr": 2.2080904657478305e-05, "epoch": 18.29140461215933, "percentage": 91.46, "elapsed_time": "1:29:22", "remaining_time": "0:08:20", "throughput": 4243.8, "total_tokens": 22758576}
6999
+ {"current_steps": 34905, "total_steps": 38160, "loss": 0.4292, "lr": 2.201374542111123e-05, "epoch": 18.294025157232703, "percentage": 91.47, "elapsed_time": "1:29:23", "remaining_time": "0:08:20", "throughput": 4243.87, "total_tokens": 22762128}
7000
+ {"current_steps": 34910, "total_steps": 38160, "loss": 0.4068, "lr": 2.1946686174015407e-05, "epoch": 18.29664570230608, "percentage": 91.48, "elapsed_time": "1:29:24", "remaining_time": "0:08:19", "throughput": 4243.85, "total_tokens": 22764784}
7001
+ {"current_steps": 34915, "total_steps": 38160, "loss": 0.426, "lr": 2.1879726930218946e-05, "epoch": 18.299266247379457, "percentage": 91.5, "elapsed_time": "1:29:24", "remaining_time": "0:08:18", "throughput": 4243.91, "total_tokens": 22768304}
7002
+ {"current_steps": 34920, "total_steps": 38160, "loss": 0.3971, "lr": 2.1812867703728866e-05, "epoch": 18.30188679245283, "percentage": 91.51, "elapsed_time": "1:29:25", "remaining_time": "0:08:17", "throughput": 4243.95, "total_tokens": 22771632}
7003
+ {"current_steps": 34925, "total_steps": 38160, "loss": 0.3411, "lr": 2.1746108508531426e-05, "epoch": 18.304507337526207, "percentage": 91.52, "elapsed_time": "1:29:26", "remaining_time": "0:08:17", "throughput": 4244.0, "total_tokens": 22774928}
7004
+ {"current_steps": 34930, "total_steps": 38160, "loss": 0.3557, "lr": 2.1679449358591952e-05, "epoch": 18.30712788259958, "percentage": 91.54, "elapsed_time": "1:29:27", "remaining_time": "0:08:16", "throughput": 4244.07, "total_tokens": 22778640}
7005
+ {"current_steps": 34935, "total_steps": 38160, "loss": 0.3278, "lr": 2.1612890267854624e-05, "epoch": 18.309748427672957, "percentage": 91.55, "elapsed_time": "1:29:27", "remaining_time": "0:08:15", "throughput": 4244.1, "total_tokens": 22781936}
7006
+ {"current_steps": 34940, "total_steps": 38160, "loss": 0.3724, "lr": 2.1546431250242914e-05, "epoch": 18.31236897274633, "percentage": 91.56, "elapsed_time": "1:29:28", "remaining_time": "0:08:14", "throughput": 4244.07, "total_tokens": 22784528}
7007
+ {"current_steps": 34945, "total_steps": 38160, "loss": 0.3077, "lr": 2.1480072319659427e-05, "epoch": 18.314989517819708, "percentage": 91.57, "elapsed_time": "1:29:29", "remaining_time": "0:08:14", "throughput": 4244.32, "total_tokens": 22790704}
7008
+ {"current_steps": 34950, "total_steps": 38160, "loss": 0.4542, "lr": 2.1413813489985447e-05, "epoch": 18.31761006289308, "percentage": 91.59, "elapsed_time": "1:29:30", "remaining_time": "0:08:13", "throughput": 4244.38, "total_tokens": 22794320}
7009
+ {"current_steps": 34955, "total_steps": 38160, "loss": 0.4753, "lr": 2.1347654775081716e-05, "epoch": 18.320230607966458, "percentage": 91.6, "elapsed_time": "1:29:31", "remaining_time": "0:08:12", "throughput": 4244.49, "total_tokens": 22798544}
7010
+ {"current_steps": 34960, "total_steps": 38160, "loss": 0.3972, "lr": 2.1281596188787834e-05, "epoch": 18.32285115303983, "percentage": 91.61, "elapsed_time": "1:29:31", "remaining_time": "0:08:11", "throughput": 4244.49, "total_tokens": 22801392}
7011
+ {"current_steps": 34965, "total_steps": 38160, "loss": 0.4025, "lr": 2.121563774492252e-05, "epoch": 18.32547169811321, "percentage": 91.63, "elapsed_time": "1:29:32", "remaining_time": "0:08:10", "throughput": 4244.54, "total_tokens": 22804880}
7012
+ {"current_steps": 34970, "total_steps": 38160, "loss": 0.608, "lr": 2.114977945728358e-05, "epoch": 18.328092243186582, "percentage": 91.64, "elapsed_time": "1:29:33", "remaining_time": "0:08:10", "throughput": 4244.57, "total_tokens": 22808112}
7013
+ {"current_steps": 34975, "total_steps": 38160, "loss": 0.4494, "lr": 2.1084021339647707e-05, "epoch": 18.33071278825996, "percentage": 91.65, "elapsed_time": "1:29:34", "remaining_time": "0:08:09", "throughput": 4244.85, "total_tokens": 22814544}
7014
+ {"current_steps": 34980, "total_steps": 38160, "loss": 0.3492, "lr": 2.1018363405770792e-05, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "1:29:35", "remaining_time": "0:08:08", "throughput": 4244.84, "total_tokens": 22817200}
7015
+ {"current_steps": 34985, "total_steps": 38160, "loss": 0.3357, "lr": 2.095280566938784e-05, "epoch": 18.33595387840671, "percentage": 91.68, "elapsed_time": "1:29:35", "remaining_time": "0:08:07", "throughput": 4244.84, "total_tokens": 22820048}
7016
+ {"current_steps": 34990, "total_steps": 38160, "loss": 0.3754, "lr": 2.0887348144212615e-05, "epoch": 18.338574423480082, "percentage": 91.69, "elapsed_time": "1:29:36", "remaining_time": "0:08:07", "throughput": 4244.87, "total_tokens": 22823280}
7017
+ {"current_steps": 34995, "total_steps": 38160, "loss": 0.4762, "lr": 2.08219908439381e-05, "epoch": 18.34119496855346, "percentage": 91.71, "elapsed_time": "1:29:37", "remaining_time": "0:08:06", "throughput": 4244.88, "total_tokens": 22826224}
7018
+ {"current_steps": 35000, "total_steps": 38160, "loss": 0.3158, "lr": 2.075673378223647e-05, "epoch": 18.343815513626833, "percentage": 91.72, "elapsed_time": "1:29:38", "remaining_time": "0:08:05", "throughput": 4244.9, "total_tokens": 22829360}
7019
+ {"current_steps": 35005, "total_steps": 38160, "loss": 0.3997, "lr": 2.069157697275853e-05, "epoch": 18.34643605870021, "percentage": 91.73, "elapsed_time": "1:29:38", "remaining_time": "0:08:04", "throughput": 4244.93, "total_tokens": 22832688}
7020
+ {"current_steps": 35010, "total_steps": 38160, "loss": 0.3555, "lr": 2.0626520429134543e-05, "epoch": 18.349056603773583, "percentage": 91.75, "elapsed_time": "1:29:39", "remaining_time": "0:08:04", "throughput": 4244.98, "total_tokens": 22836176}
7021
+ {"current_steps": 35015, "total_steps": 38160, "loss": 0.3097, "lr": 2.0561564164973458e-05, "epoch": 18.35167714884696, "percentage": 91.76, "elapsed_time": "1:29:40", "remaining_time": "0:08:03", "throughput": 4244.99, "total_tokens": 22839088}
7022
+ {"current_steps": 35020, "total_steps": 38160, "loss": 0.4021, "lr": 2.04967081938634e-05, "epoch": 18.354297693920337, "percentage": 91.77, "elapsed_time": "1:29:40", "remaining_time": "0:08:02", "throughput": 4245.03, "total_tokens": 22842384}
7023
+ {"current_steps": 35025, "total_steps": 38160, "loss": 0.4408, "lr": 2.043195252937152e-05, "epoch": 18.35691823899371, "percentage": 91.78, "elapsed_time": "1:29:41", "remaining_time": "0:08:01", "throughput": 4245.02, "total_tokens": 22845232}
7024
+ {"current_steps": 35030, "total_steps": 38160, "loss": 0.4412, "lr": 2.0367297185044043e-05, "epoch": 18.359538784067087, "percentage": 91.8, "elapsed_time": "1:29:42", "remaining_time": "0:08:00", "throughput": 4245.01, "total_tokens": 22848016}
7025
+ {"current_steps": 35035, "total_steps": 38160, "loss": 0.5107, "lr": 2.030274217440603e-05, "epoch": 18.36215932914046, "percentage": 91.81, "elapsed_time": "1:29:42", "remaining_time": "0:08:00", "throughput": 4244.99, "total_tokens": 22850672}
7026
+ {"current_steps": 35040, "total_steps": 38160, "loss": 0.2978, "lr": 2.0238287510961628e-05, "epoch": 18.364779874213838, "percentage": 91.82, "elapsed_time": "1:29:43", "remaining_time": "0:07:59", "throughput": 4244.99, "total_tokens": 22853584}
7027
+ {"current_steps": 35045, "total_steps": 38160, "loss": 0.3747, "lr": 2.017393320819405e-05, "epoch": 18.36740041928721, "percentage": 91.84, "elapsed_time": "1:29:44", "remaining_time": "0:07:58", "throughput": 4245.06, "total_tokens": 22857424}
7028
+ {"current_steps": 35050, "total_steps": 38160, "loss": 0.3845, "lr": 2.010967927956553e-05, "epoch": 18.370020964360588, "percentage": 91.85, "elapsed_time": "1:29:45", "remaining_time": "0:07:57", "throughput": 4245.17, "total_tokens": 22861456}
7029
+ {"current_steps": 35055, "total_steps": 38160, "loss": 0.3705, "lr": 2.00455257385172e-05, "epoch": 18.37264150943396, "percentage": 91.86, "elapsed_time": "1:29:46", "remaining_time": "0:07:57", "throughput": 4245.21, "total_tokens": 22864752}
7030
+ {"current_steps": 35060, "total_steps": 38160, "loss": 0.3471, "lr": 1.9981472598469386e-05, "epoch": 18.37526205450734, "percentage": 91.88, "elapsed_time": "1:29:46", "remaining_time": "0:07:56", "throughput": 4245.3, "total_tokens": 22868624}
7031
+ {"current_steps": 35065, "total_steps": 38160, "loss": 0.4729, "lr": 1.9917519872821142e-05, "epoch": 18.377882599580712, "percentage": 91.89, "elapsed_time": "1:29:47", "remaining_time": "0:07:55", "throughput": 4245.33, "total_tokens": 22871760}
7032
+ {"current_steps": 35070, "total_steps": 38160, "loss": 0.3713, "lr": 1.9853667574950605e-05, "epoch": 18.38050314465409, "percentage": 91.9, "elapsed_time": "1:29:48", "remaining_time": "0:07:54", "throughput": 4245.38, "total_tokens": 22875248}
7033
+ {"current_steps": 35075, "total_steps": 38160, "loss": 0.4612, "lr": 1.978991571821498e-05, "epoch": 18.383123689727462, "percentage": 91.92, "elapsed_time": "1:29:49", "remaining_time": "0:07:53", "throughput": 4245.42, "total_tokens": 22878640}
7034
+ {"current_steps": 35080, "total_steps": 38160, "loss": 0.377, "lr": 1.972626431595048e-05, "epoch": 18.38574423480084, "percentage": 91.93, "elapsed_time": "1:29:49", "remaining_time": "0:07:53", "throughput": 4245.43, "total_tokens": 22881584}
7035
+ {"current_steps": 35085, "total_steps": 38160, "loss": 0.3947, "lr": 1.9662713381472295e-05, "epoch": 18.388364779874212, "percentage": 91.94, "elapsed_time": "1:29:50", "remaining_time": "0:07:52", "throughput": 4245.42, "total_tokens": 22884368}
7036
+ {"current_steps": 35090, "total_steps": 38160, "loss": 0.509, "lr": 1.959926292807451e-05, "epoch": 18.39098532494759, "percentage": 91.95, "elapsed_time": "1:29:50", "remaining_time": "0:07:51", "throughput": 4245.4, "total_tokens": 22886896}
7037
+ {"current_steps": 35095, "total_steps": 38160, "loss": 0.4735, "lr": 1.9535912969030178e-05, "epoch": 18.393605870020963, "percentage": 91.97, "elapsed_time": "1:29:51", "remaining_time": "0:07:50", "throughput": 4245.46, "total_tokens": 22890448}
7038
+ {"current_steps": 35100, "total_steps": 38160, "loss": 0.4035, "lr": 1.947266351759136e-05, "epoch": 18.39622641509434, "percentage": 91.98, "elapsed_time": "1:29:52", "remaining_time": "0:07:50", "throughput": 4245.53, "total_tokens": 22894224}
7039
+ {"current_steps": 35105, "total_steps": 38160, "loss": 0.5141, "lr": 1.940951458698925e-05, "epoch": 18.398846960167713, "percentage": 91.99, "elapsed_time": "1:29:53", "remaining_time": "0:07:49", "throughput": 4245.57, "total_tokens": 22897680}
7040
+ {"current_steps": 35110, "total_steps": 38160, "loss": 0.4072, "lr": 1.9346466190433842e-05, "epoch": 18.40146750524109, "percentage": 92.01, "elapsed_time": "1:29:54", "remaining_time": "0:07:48", "throughput": 4245.6, "total_tokens": 22900880}
7041
+ {"current_steps": 35115, "total_steps": 38160, "loss": 0.4257, "lr": 1.9283518341114136e-05, "epoch": 18.404088050314467, "percentage": 92.02, "elapsed_time": "1:29:54", "remaining_time": "0:07:47", "throughput": 4245.62, "total_tokens": 22903984}
7042
+ {"current_steps": 35120, "total_steps": 38160, "loss": 0.5028, "lr": 1.9220671052198047e-05, "epoch": 18.40670859538784, "percentage": 92.03, "elapsed_time": "1:29:55", "remaining_time": "0:07:47", "throughput": 4245.66, "total_tokens": 22907216}
7043
+ {"current_steps": 35125, "total_steps": 38160, "loss": 0.5816, "lr": 1.9157924336832556e-05, "epoch": 18.409329140461217, "percentage": 92.05, "elapsed_time": "1:29:56", "remaining_time": "0:07:46", "throughput": 4245.68, "total_tokens": 22910352}
7044
+ {"current_steps": 35130, "total_steps": 38160, "loss": 0.6077, "lr": 1.909527820814355e-05, "epoch": 18.41194968553459, "percentage": 92.06, "elapsed_time": "1:29:56", "remaining_time": "0:07:45", "throughput": 4245.73, "total_tokens": 22913776}
7045
+ {"current_steps": 35135, "total_steps": 38160, "loss": 0.5534, "lr": 1.9032732679235886e-05, "epoch": 18.414570230607968, "percentage": 92.07, "elapsed_time": "1:29:57", "remaining_time": "0:07:44", "throughput": 4245.82, "total_tokens": 22917584}
7046
+ {"current_steps": 35140, "total_steps": 38160, "loss": 0.4127, "lr": 1.8970287763193428e-05, "epoch": 18.41719077568134, "percentage": 92.09, "elapsed_time": "1:29:58", "remaining_time": "0:07:43", "throughput": 4245.84, "total_tokens": 22920592}
7047
+ {"current_steps": 35145, "total_steps": 38160, "loss": 0.4117, "lr": 1.8907943473078892e-05, "epoch": 18.419811320754718, "percentage": 92.1, "elapsed_time": "1:29:59", "remaining_time": "0:07:43", "throughput": 4245.82, "total_tokens": 22923248}
7048
+ {"current_steps": 35150, "total_steps": 38160, "loss": 0.3661, "lr": 1.884569982193396e-05, "epoch": 18.42243186582809, "percentage": 92.11, "elapsed_time": "1:29:59", "remaining_time": "0:07:42", "throughput": 4245.89, "total_tokens": 22926832}
7049
+ {"current_steps": 35155, "total_steps": 38160, "loss": 0.4296, "lr": 1.8783556822779267e-05, "epoch": 18.42505241090147, "percentage": 92.13, "elapsed_time": "1:30:00", "remaining_time": "0:07:41", "throughput": 4245.94, "total_tokens": 22930256}
7050
+ {"current_steps": 35160, "total_steps": 38160, "loss": 0.5727, "lr": 1.8721514488614532e-05, "epoch": 18.427672955974842, "percentage": 92.14, "elapsed_time": "1:30:01", "remaining_time": "0:07:40", "throughput": 4245.98, "total_tokens": 22933552}
7051
+ {"current_steps": 35165, "total_steps": 38160, "loss": 0.3944, "lr": 1.8659572832418315e-05, "epoch": 18.43029350104822, "percentage": 92.15, "elapsed_time": "1:30:01", "remaining_time": "0:07:40", "throughput": 4245.95, "total_tokens": 22936112}
7052
+ {"current_steps": 35170, "total_steps": 38160, "loss": 0.5773, "lr": 1.8597731867148026e-05, "epoch": 18.432914046121592, "percentage": 92.16, "elapsed_time": "1:30:02", "remaining_time": "0:07:39", "throughput": 4245.97, "total_tokens": 22939376}
7053
+ {"current_steps": 35175, "total_steps": 38160, "loss": 0.3923, "lr": 1.8535991605740043e-05, "epoch": 18.43553459119497, "percentage": 92.18, "elapsed_time": "1:30:03", "remaining_time": "0:07:38", "throughput": 4245.98, "total_tokens": 22942416}
7054
+ {"current_steps": 35180, "total_steps": 38160, "loss": 0.4708, "lr": 1.8474352061109757e-05, "epoch": 18.438155136268342, "percentage": 92.19, "elapsed_time": "1:30:04", "remaining_time": "0:07:37", "throughput": 4246.0, "total_tokens": 22945520}
7055
+ {"current_steps": 35185, "total_steps": 38160, "loss": 0.4677, "lr": 1.8412813246151515e-05, "epoch": 18.44077568134172, "percentage": 92.2, "elapsed_time": "1:30:04", "remaining_time": "0:07:36", "throughput": 4245.99, "total_tokens": 22948240}
7056
+ {"current_steps": 35190, "total_steps": 38160, "loss": 0.4811, "lr": 1.8351375173738584e-05, "epoch": 18.443396226415093, "percentage": 92.22, "elapsed_time": "1:30:05", "remaining_time": "0:07:36", "throughput": 4246.02, "total_tokens": 22951536}
7057
+ {"current_steps": 35195, "total_steps": 38160, "loss": 0.4009, "lr": 1.829003785672295e-05, "epoch": 18.44601677148847, "percentage": 92.23, "elapsed_time": "1:30:06", "remaining_time": "0:07:35", "throughput": 4246.1, "total_tokens": 22955280}
7058
+ {"current_steps": 35200, "total_steps": 38160, "loss": 0.4267, "lr": 1.8228801307935806e-05, "epoch": 18.448637316561843, "percentage": 92.24, "elapsed_time": "1:30:06", "remaining_time": "0:07:34", "throughput": 4246.13, "total_tokens": 22958576}
7059
+ {"current_steps": 35205, "total_steps": 38160, "loss": 0.4438, "lr": 1.8167665540187063e-05, "epoch": 18.45125786163522, "percentage": 92.26, "elapsed_time": "1:30:07", "remaining_time": "0:07:33", "throughput": 4246.12, "total_tokens": 22961328}
7060
+ {"current_steps": 35210, "total_steps": 38160, "loss": 0.3014, "lr": 1.8106630566265604e-05, "epoch": 18.453878406708597, "percentage": 92.27, "elapsed_time": "1:30:08", "remaining_time": "0:07:33", "throughput": 4246.18, "total_tokens": 22964880}
7061
+ {"current_steps": 35215, "total_steps": 38160, "loss": 0.405, "lr": 1.8045696398939326e-05, "epoch": 18.45649895178197, "percentage": 92.28, "elapsed_time": "1:30:09", "remaining_time": "0:07:32", "throughput": 4246.27, "total_tokens": 22968848}
7062
+ {"current_steps": 35220, "total_steps": 38160, "loss": 0.5101, "lr": 1.7984863050955036e-05, "epoch": 18.459119496855347, "percentage": 92.3, "elapsed_time": "1:30:10", "remaining_time": "0:07:31", "throughput": 4246.38, "total_tokens": 22972944}
7063
+ {"current_steps": 35225, "total_steps": 38160, "loss": 0.4701, "lr": 1.7924130535038162e-05, "epoch": 18.46174004192872, "percentage": 92.31, "elapsed_time": "1:30:10", "remaining_time": "0:07:30", "throughput": 4246.42, "total_tokens": 22976208}
7064
+ {"current_steps": 35230, "total_steps": 38160, "loss": 0.4033, "lr": 1.7863498863893433e-05, "epoch": 18.464360587002098, "percentage": 92.32, "elapsed_time": "1:30:11", "remaining_time": "0:07:30", "throughput": 4246.41, "total_tokens": 22978832}
7065
+ {"current_steps": 35235, "total_steps": 38160, "loss": 0.5057, "lr": 1.7802968050204203e-05, "epoch": 18.46698113207547, "percentage": 92.33, "elapsed_time": "1:30:12", "remaining_time": "0:07:29", "throughput": 4246.43, "total_tokens": 22982096}
7066
+ {"current_steps": 35240, "total_steps": 38160, "loss": 0.5859, "lr": 1.7742538106632844e-05, "epoch": 18.469601677148848, "percentage": 92.35, "elapsed_time": "1:30:12", "remaining_time": "0:07:28", "throughput": 4246.52, "total_tokens": 22986032}
7067
+ {"current_steps": 35245, "total_steps": 38160, "loss": 0.3565, "lr": 1.7682209045820684e-05, "epoch": 18.47222222222222, "percentage": 92.36, "elapsed_time": "1:30:13", "remaining_time": "0:07:27", "throughput": 4246.53, "total_tokens": 22989168}
7068
+ {"current_steps": 35250, "total_steps": 38160, "loss": 0.4738, "lr": 1.76219808803878e-05, "epoch": 18.4748427672956, "percentage": 92.37, "elapsed_time": "1:30:14", "remaining_time": "0:07:26", "throughput": 4246.6, "total_tokens": 22992784}
7069
+ {"current_steps": 35255, "total_steps": 38160, "loss": 0.3464, "lr": 1.7561853622933278e-05, "epoch": 18.47746331236897, "percentage": 92.39, "elapsed_time": "1:30:15", "remaining_time": "0:07:26", "throughput": 4246.62, "total_tokens": 22995824}
7070
+ {"current_steps": 35260, "total_steps": 38160, "loss": 0.4793, "lr": 1.7501827286035e-05, "epoch": 18.48008385744235, "percentage": 92.4, "elapsed_time": "1:30:15", "remaining_time": "0:07:25", "throughput": 4246.64, "total_tokens": 22998928}
7071
+ {"current_steps": 35265, "total_steps": 38160, "loss": 0.3338, "lr": 1.7441901882249754e-05, "epoch": 18.482704402515722, "percentage": 92.41, "elapsed_time": "1:30:16", "remaining_time": "0:07:24", "throughput": 4246.61, "total_tokens": 23001424}
7072
+ {"current_steps": 35270, "total_steps": 38160, "loss": 0.3866, "lr": 1.7382077424113464e-05, "epoch": 18.4853249475891, "percentage": 92.43, "elapsed_time": "1:30:17", "remaining_time": "0:07:23", "throughput": 4246.65, "total_tokens": 23004784}
7073
+ {"current_steps": 35275, "total_steps": 38160, "loss": 0.3582, "lr": 1.7322353924140498e-05, "epoch": 18.487945492662472, "percentage": 92.44, "elapsed_time": "1:30:17", "remaining_time": "0:07:23", "throughput": 4246.68, "total_tokens": 23008048}
7074
+ {"current_steps": 35280, "total_steps": 38160, "loss": 0.2952, "lr": 1.7262731394824372e-05, "epoch": 18.49056603773585, "percentage": 92.45, "elapsed_time": "1:30:18", "remaining_time": "0:07:22", "throughput": 4246.63, "total_tokens": 23010416}
7075
+ {"current_steps": 35285, "total_steps": 38160, "loss": 0.4329, "lr": 1.7203209848637603e-05, "epoch": 18.493186582809223, "percentage": 92.47, "elapsed_time": "1:30:19", "remaining_time": "0:07:21", "throughput": 4246.66, "total_tokens": 23013616}
7076
+ {"current_steps": 35290, "total_steps": 38160, "loss": 0.4018, "lr": 1.7143789298031175e-05, "epoch": 18.4958071278826, "percentage": 92.48, "elapsed_time": "1:30:20", "remaining_time": "0:07:20", "throughput": 4246.77, "total_tokens": 23017808}
7077
+ {"current_steps": 35295, "total_steps": 38160, "loss": 0.3369, "lr": 1.708446975543537e-05, "epoch": 18.498427672955973, "percentage": 92.49, "elapsed_time": "1:30:20", "remaining_time": "0:07:20", "throughput": 4246.78, "total_tokens": 23020720}
7078
+ {"current_steps": 35300, "total_steps": 38160, "loss": 0.3027, "lr": 1.7025251233259098e-05, "epoch": 18.50104821802935, "percentage": 92.51, "elapsed_time": "1:30:21", "remaining_time": "0:07:19", "throughput": 4246.86, "total_tokens": 23024560}
7079
+ {"current_steps": 35305, "total_steps": 38160, "loss": 0.4093, "lr": 1.6966133743890166e-05, "epoch": 18.503668763102727, "percentage": 92.52, "elapsed_time": "1:30:22", "remaining_time": "0:07:18", "throughput": 4246.9, "total_tokens": 23027856}
7080
+ {"current_steps": 35310, "total_steps": 38160, "loss": 0.3726, "lr": 1.690711729969535e-05, "epoch": 18.5062893081761, "percentage": 92.53, "elapsed_time": "1:30:22", "remaining_time": "0:07:17", "throughput": 4246.92, "total_tokens": 23031024}
7081
+ {"current_steps": 35315, "total_steps": 38160, "loss": 0.3525, "lr": 1.684820191302022e-05, "epoch": 18.508909853249477, "percentage": 92.54, "elapsed_time": "1:30:23", "remaining_time": "0:07:16", "throughput": 4246.94, "total_tokens": 23034192}
7082
+ {"current_steps": 35320, "total_steps": 38160, "loss": 0.4685, "lr": 1.6789387596189087e-05, "epoch": 18.51153039832285, "percentage": 92.56, "elapsed_time": "1:30:24", "remaining_time": "0:07:16", "throughput": 4246.99, "total_tokens": 23037680}
7083
+ {"current_steps": 35325, "total_steps": 38160, "loss": 0.4078, "lr": 1.6730674361505382e-05, "epoch": 18.514150943396228, "percentage": 92.57, "elapsed_time": "1:30:25", "remaining_time": "0:07:15", "throughput": 4247.01, "total_tokens": 23040784}
7084
+ {"current_steps": 35330, "total_steps": 38160, "loss": 0.3935, "lr": 1.6672062221251117e-05, "epoch": 18.5167714884696, "percentage": 92.58, "elapsed_time": "1:30:25", "remaining_time": "0:07:14", "throughput": 4247.03, "total_tokens": 23043888}
7085
+ {"current_steps": 35335, "total_steps": 38160, "loss": 0.4532, "lr": 1.6613551187687314e-05, "epoch": 18.519392033542978, "percentage": 92.6, "elapsed_time": "1:30:26", "remaining_time": "0:07:13", "throughput": 4247.04, "total_tokens": 23046928}
7086
+ {"current_steps": 35340, "total_steps": 38160, "loss": 0.34, "lr": 1.6555141273053907e-05, "epoch": 18.52201257861635, "percentage": 92.61, "elapsed_time": "1:30:27", "remaining_time": "0:07:13", "throughput": 4247.23, "total_tokens": 23052016}
7087
+ {"current_steps": 35345, "total_steps": 38160, "loss": 0.4066, "lr": 1.6496832489569457e-05, "epoch": 18.52463312368973, "percentage": 92.62, "elapsed_time": "1:30:28", "remaining_time": "0:07:12", "throughput": 4247.29, "total_tokens": 23055632}
7088
+ {"current_steps": 35350, "total_steps": 38160, "loss": 0.4199, "lr": 1.643862484943165e-05, "epoch": 18.5272536687631, "percentage": 92.64, "elapsed_time": "1:30:29", "remaining_time": "0:07:11", "throughput": 4247.3, "total_tokens": 23058640}
7089
+ {"current_steps": 35355, "total_steps": 38160, "loss": 0.4618, "lr": 1.6380518364816687e-05, "epoch": 18.52987421383648, "percentage": 92.65, "elapsed_time": "1:30:29", "remaining_time": "0:07:10", "throughput": 4247.3, "total_tokens": 23061488}
7090
+ {"current_steps": 35360, "total_steps": 38160, "loss": 0.3365, "lr": 1.632251304787985e-05, "epoch": 18.532494758909852, "percentage": 92.66, "elapsed_time": "1:30:30", "remaining_time": "0:07:10", "throughput": 4247.27, "total_tokens": 23064016}
7091
+ {"current_steps": 35365, "total_steps": 38160, "loss": 0.3891, "lr": 1.6264608910755153e-05, "epoch": 18.53511530398323, "percentage": 92.68, "elapsed_time": "1:30:30", "remaining_time": "0:07:09", "throughput": 4247.24, "total_tokens": 23066576}
7092
+ {"current_steps": 35370, "total_steps": 38160, "loss": 0.4386, "lr": 1.6206805965555627e-05, "epoch": 18.537735849056602, "percentage": 92.69, "elapsed_time": "1:30:31", "remaining_time": "0:07:08", "throughput": 4247.29, "total_tokens": 23069936}
7093
+ {"current_steps": 35375, "total_steps": 38160, "loss": 0.7166, "lr": 1.614910422437288e-05, "epoch": 18.54035639412998, "percentage": 92.7, "elapsed_time": "1:30:32", "remaining_time": "0:07:07", "throughput": 4247.32, "total_tokens": 23073392}
7094
+ {"current_steps": 35380, "total_steps": 38160, "loss": 0.4112, "lr": 1.6091503699277477e-05, "epoch": 18.542976939203353, "percentage": 92.71, "elapsed_time": "1:30:33", "remaining_time": "0:07:06", "throughput": 4247.56, "total_tokens": 23079568}
7095
+ {"current_steps": 35385, "total_steps": 38160, "loss": 0.329, "lr": 1.6034004402318726e-05, "epoch": 18.54559748427673, "percentage": 92.73, "elapsed_time": "1:30:34", "remaining_time": "0:07:06", "throughput": 4247.56, "total_tokens": 23082480}
7096
+ {"current_steps": 35390, "total_steps": 38160, "loss": 0.3841, "lr": 1.5976606345524836e-05, "epoch": 18.548218029350103, "percentage": 92.74, "elapsed_time": "1:30:34", "remaining_time": "0:07:05", "throughput": 4247.55, "total_tokens": 23085328}
7097
+ {"current_steps": 35395, "total_steps": 38160, "loss": 0.447, "lr": 1.5919309540902927e-05, "epoch": 18.55083857442348, "percentage": 92.75, "elapsed_time": "1:30:35", "remaining_time": "0:07:04", "throughput": 4247.54, "total_tokens": 23088208}
7098
+ {"current_steps": 35400, "total_steps": 38160, "loss": 0.3579, "lr": 1.5862114000438797e-05, "epoch": 18.553459119496857, "percentage": 92.77, "elapsed_time": "1:30:36", "remaining_time": "0:07:03", "throughput": 4247.57, "total_tokens": 23091376}
7099
+ {"current_steps": 35405, "total_steps": 38160, "loss": 0.4901, "lr": 1.5805019736097104e-05, "epoch": 18.55607966457023, "percentage": 92.78, "elapsed_time": "1:30:37", "remaining_time": "0:07:03", "throughput": 4247.62, "total_tokens": 23094832}
7100
+ {"current_steps": 35410, "total_steps": 38160, "loss": 0.347, "lr": 1.5748026759821232e-05, "epoch": 18.558700209643607, "percentage": 92.79, "elapsed_time": "1:30:37", "remaining_time": "0:07:02", "throughput": 4247.62, "total_tokens": 23097808}
7101
+ {"current_steps": 35415, "total_steps": 38160, "loss": 0.5002, "lr": 1.5691135083533537e-05, "epoch": 18.56132075471698, "percentage": 92.81, "elapsed_time": "1:30:38", "remaining_time": "0:07:01", "throughput": 4247.59, "total_tokens": 23100464}
7102
+ {"current_steps": 35420, "total_steps": 38160, "loss": 0.3637, "lr": 1.5634344719135052e-05, "epoch": 18.563941299790358, "percentage": 92.82, "elapsed_time": "1:30:39", "remaining_time": "0:07:00", "throughput": 4247.63, "total_tokens": 23103760}
7103
+ {"current_steps": 35425, "total_steps": 38160, "loss": 0.4174, "lr": 1.5577655678505776e-05, "epoch": 18.56656184486373, "percentage": 92.83, "elapsed_time": "1:30:39", "remaining_time": "0:06:59", "throughput": 4247.65, "total_tokens": 23106832}
7104
+ {"current_steps": 35430, "total_steps": 38160, "loss": 0.4753, "lr": 1.5521067973504442e-05, "epoch": 18.569182389937108, "percentage": 92.85, "elapsed_time": "1:30:40", "remaining_time": "0:06:59", "throughput": 4247.74, "total_tokens": 23110832}
7105
+ {"current_steps": 35435, "total_steps": 38160, "loss": 0.3034, "lr": 1.546458161596831e-05, "epoch": 18.57180293501048, "percentage": 92.86, "elapsed_time": "1:30:41", "remaining_time": "0:06:58", "throughput": 4247.77, "total_tokens": 23113968}
7106
+ {"current_steps": 35440, "total_steps": 38160, "loss": 0.4055, "lr": 1.5408196617713866e-05, "epoch": 18.57442348008386, "percentage": 92.87, "elapsed_time": "1:30:42", "remaining_time": "0:06:57", "throughput": 4247.77, "total_tokens": 23116944}
7107
+ {"current_steps": 35445, "total_steps": 38160, "loss": 0.4864, "lr": 1.5351912990536175e-05, "epoch": 18.57704402515723, "percentage": 92.89, "elapsed_time": "1:30:42", "remaining_time": "0:06:56", "throughput": 4247.81, "total_tokens": 23120112}
7108
+ {"current_steps": 35450, "total_steps": 38160, "loss": 0.4781, "lr": 1.5295730746209103e-05, "epoch": 18.57966457023061, "percentage": 92.9, "elapsed_time": "1:30:43", "remaining_time": "0:06:56", "throughput": 4247.9, "total_tokens": 23124176}
7109
+ {"current_steps": 35455, "total_steps": 38160, "loss": 0.378, "lr": 1.5239649896485463e-05, "epoch": 18.582285115303982, "percentage": 92.91, "elapsed_time": "1:30:44", "remaining_time": "0:06:55", "throughput": 4247.88, "total_tokens": 23126832}
7110
+ {"current_steps": 35460, "total_steps": 38160, "loss": 0.4183, "lr": 1.5183670453096598e-05, "epoch": 18.58490566037736, "percentage": 92.92, "elapsed_time": "1:30:45", "remaining_time": "0:06:54", "throughput": 4247.9, "total_tokens": 23129904}
7111
+ {"current_steps": 35465, "total_steps": 38160, "loss": 0.4015, "lr": 1.5127792427752696e-05, "epoch": 18.587526205450732, "percentage": 92.94, "elapsed_time": "1:30:45", "remaining_time": "0:06:53", "throughput": 4247.93, "total_tokens": 23133200}
7112
+ {"current_steps": 35470, "total_steps": 38160, "loss": 0.3552, "lr": 1.507201583214296e-05, "epoch": 18.59014675052411, "percentage": 92.95, "elapsed_time": "1:30:46", "remaining_time": "0:06:53", "throughput": 4247.96, "total_tokens": 23136496}
7113
+ {"current_steps": 35475, "total_steps": 38160, "loss": 0.349, "lr": 1.5016340677935169e-05, "epoch": 18.592767295597483, "percentage": 92.96, "elapsed_time": "1:30:47", "remaining_time": "0:06:52", "throughput": 4247.98, "total_tokens": 23139472}
7114
+ {"current_steps": 35480, "total_steps": 38160, "loss": 0.3955, "lr": 1.4960766976775953e-05, "epoch": 18.59538784067086, "percentage": 92.98, "elapsed_time": "1:30:47", "remaining_time": "0:06:51", "throughput": 4247.97, "total_tokens": 23142384}
7115
+ {"current_steps": 35485, "total_steps": 38160, "loss": 0.4373, "lr": 1.4905294740290677e-05, "epoch": 18.598008385744233, "percentage": 92.99, "elapsed_time": "1:30:48", "remaining_time": "0:06:50", "throughput": 4247.98, "total_tokens": 23145328}
7116
+ {"current_steps": 35490, "total_steps": 38160, "loss": 0.4296, "lr": 1.4849923980083391e-05, "epoch": 18.60062893081761, "percentage": 93.0, "elapsed_time": "1:30:49", "remaining_time": "0:06:49", "throughput": 4248.03, "total_tokens": 23148912}
7117
+ {"current_steps": 35495, "total_steps": 38160, "loss": 0.4428, "lr": 1.4794654707737164e-05, "epoch": 18.603249475890987, "percentage": 93.02, "elapsed_time": "1:30:49", "remaining_time": "0:06:49", "throughput": 4248.02, "total_tokens": 23151632}
7118
+ {"current_steps": 35500, "total_steps": 38160, "loss": 0.3877, "lr": 1.4739486934813696e-05, "epoch": 18.60587002096436, "percentage": 93.03, "elapsed_time": "1:30:50", "remaining_time": "0:06:48", "throughput": 4247.98, "total_tokens": 23154032}
7119
+ {"current_steps": 35505, "total_steps": 38160, "loss": 0.4348, "lr": 1.468442067285336e-05, "epoch": 18.608490566037737, "percentage": 93.04, "elapsed_time": "1:30:51", "remaining_time": "0:06:47", "throughput": 4247.98, "total_tokens": 23156976}
7120
+ {"current_steps": 35510, "total_steps": 38160, "loss": 0.4224, "lr": 1.4629455933375501e-05, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "1:30:51", "remaining_time": "0:06:46", "throughput": 4247.95, "total_tokens": 23159472}
7121
+ {"current_steps": 35515, "total_steps": 38160, "loss": 0.5039, "lr": 1.4574592727878089e-05, "epoch": 18.613731656184488, "percentage": 93.07, "elapsed_time": "1:30:52", "remaining_time": "0:06:46", "throughput": 4247.96, "total_tokens": 23162672}
7122
+ {"current_steps": 35520, "total_steps": 38160, "loss": 0.4714, "lr": 1.4519831067837774e-05, "epoch": 18.61635220125786, "percentage": 93.08, "elapsed_time": "1:30:53", "remaining_time": "0:06:45", "throughput": 4247.96, "total_tokens": 23165456}
7123
+ {"current_steps": 35525, "total_steps": 38160, "loss": 0.4144, "lr": 1.4465170964710172e-05, "epoch": 18.618972746331238, "percentage": 93.09, "elapsed_time": "1:30:54", "remaining_time": "0:06:44", "throughput": 4247.98, "total_tokens": 23168656}
7124
+ {"current_steps": 35530, "total_steps": 38160, "loss": 0.4298, "lr": 1.441061242992947e-05, "epoch": 18.62159329140461, "percentage": 93.11, "elapsed_time": "1:30:54", "remaining_time": "0:06:43", "throughput": 4248.0, "total_tokens": 23171856}
7125
+ {"current_steps": 35535, "total_steps": 38160, "loss": 0.3604, "lr": 1.4356155474908871e-05, "epoch": 18.62421383647799, "percentage": 93.12, "elapsed_time": "1:30:55", "remaining_time": "0:06:43", "throughput": 4248.1, "total_tokens": 23175856}
7126
+ {"current_steps": 35540, "total_steps": 38160, "loss": 0.4788, "lr": 1.4301800111039986e-05, "epoch": 18.62683438155136, "percentage": 93.13, "elapsed_time": "1:30:56", "remaining_time": "0:06:42", "throughput": 4248.11, "total_tokens": 23178928}
7127
+ {"current_steps": 35545, "total_steps": 38160, "loss": 0.472, "lr": 1.4247546349693386e-05, "epoch": 18.62945492662474, "percentage": 93.15, "elapsed_time": "1:30:57", "remaining_time": "0:06:41", "throughput": 4248.16, "total_tokens": 23182384}
7128
+ {"current_steps": 35550, "total_steps": 38160, "loss": 0.2986, "lr": 1.4193394202218268e-05, "epoch": 18.632075471698112, "percentage": 93.16, "elapsed_time": "1:30:57", "remaining_time": "0:06:40", "throughput": 4248.24, "total_tokens": 23186192}
7129
+ {"current_steps": 35555, "total_steps": 38160, "loss": 0.4596, "lr": 1.413934367994274e-05, "epoch": 18.63469601677149, "percentage": 93.17, "elapsed_time": "1:30:58", "remaining_time": "0:06:39", "throughput": 4248.41, "total_tokens": 23191056}
7130
+ {"current_steps": 35560, "total_steps": 38160, "loss": 0.4185, "lr": 1.408539479417359e-05, "epoch": 18.637316561844862, "percentage": 93.19, "elapsed_time": "1:30:59", "remaining_time": "0:06:39", "throughput": 4248.43, "total_tokens": 23194160}
7131
+ {"current_steps": 35565, "total_steps": 38160, "loss": 0.3927, "lr": 1.4031547556196178e-05, "epoch": 18.63993710691824, "percentage": 93.2, "elapsed_time": "1:31:00", "remaining_time": "0:06:38", "throughput": 4248.47, "total_tokens": 23197392}
7132
+ {"current_steps": 35570, "total_steps": 38160, "loss": 0.3783, "lr": 1.3977801977274828e-05, "epoch": 18.642557651991613, "percentage": 93.21, "elapsed_time": "1:31:00", "remaining_time": "0:06:37", "throughput": 4248.53, "total_tokens": 23201008}
7133
+ {"current_steps": 35575, "total_steps": 38160, "loss": 0.3483, "lr": 1.3924158068652437e-05, "epoch": 18.64517819706499, "percentage": 93.23, "elapsed_time": "1:31:02", "remaining_time": "0:06:36", "throughput": 4248.84, "total_tokens": 23208176}
7134
+ {"current_steps": 35580, "total_steps": 38160, "loss": 0.4237, "lr": 1.3870615841550693e-05, "epoch": 18.647798742138363, "percentage": 93.24, "elapsed_time": "1:31:02", "remaining_time": "0:06:36", "throughput": 4248.81, "total_tokens": 23210768}
7135
+ {"current_steps": 35585, "total_steps": 38160, "loss": 0.655, "lr": 1.3817175307170138e-05, "epoch": 18.65041928721174, "percentage": 93.25, "elapsed_time": "1:31:03", "remaining_time": "0:06:35", "throughput": 4248.88, "total_tokens": 23214480}
7136
+ {"current_steps": 35590, "total_steps": 38160, "loss": 0.38, "lr": 1.3763836476689828e-05, "epoch": 18.653039832285117, "percentage": 93.27, "elapsed_time": "1:31:04", "remaining_time": "0:06:34", "throughput": 4248.88, "total_tokens": 23217296}
7137
+ {"current_steps": 35595, "total_steps": 38160, "loss": 0.3501, "lr": 1.3710599361267617e-05, "epoch": 18.65566037735849, "percentage": 93.28, "elapsed_time": "1:31:05", "remaining_time": "0:06:33", "throughput": 4248.89, "total_tokens": 23220272}
7138
+ {"current_steps": 35600, "total_steps": 38160, "loss": 0.3514, "lr": 1.3657463972040207e-05, "epoch": 18.658280922431867, "percentage": 93.29, "elapsed_time": "1:31:05", "remaining_time": "0:06:33", "throughput": 4249.05, "total_tokens": 23224912}
7139
+ {"current_steps": 35605, "total_steps": 38160, "loss": 0.3482, "lr": 1.3604430320122762e-05, "epoch": 18.66090146750524, "percentage": 93.3, "elapsed_time": "1:31:06", "remaining_time": "0:06:32", "throughput": 4249.03, "total_tokens": 23227600}
7140
+ {"current_steps": 35610, "total_steps": 38160, "loss": 0.5449, "lr": 1.355149841660941e-05, "epoch": 18.663522012578618, "percentage": 93.32, "elapsed_time": "1:31:07", "remaining_time": "0:06:31", "throughput": 4249.04, "total_tokens": 23230576}
7141
+ {"current_steps": 35615, "total_steps": 38160, "loss": 0.5222, "lr": 1.3498668272572955e-05, "epoch": 18.66614255765199, "percentage": 93.33, "elapsed_time": "1:31:07", "remaining_time": "0:06:30", "throughput": 4249.03, "total_tokens": 23233360}
7142
+ {"current_steps": 35620, "total_steps": 38160, "loss": 0.6085, "lr": 1.3445939899064729e-05, "epoch": 18.668763102725368, "percentage": 93.34, "elapsed_time": "1:31:08", "remaining_time": "0:06:29", "throughput": 4249.13, "total_tokens": 23237328}
7143
+ {"current_steps": 35625, "total_steps": 38160, "loss": 0.3555, "lr": 1.3393313307115019e-05, "epoch": 18.67138364779874, "percentage": 93.36, "elapsed_time": "1:31:09", "remaining_time": "0:06:29", "throughput": 4249.16, "total_tokens": 23240592}
7144
+ {"current_steps": 35630, "total_steps": 38160, "loss": 0.4175, "lr": 1.3340788507732626e-05, "epoch": 18.67400419287212, "percentage": 93.37, "elapsed_time": "1:31:10", "remaining_time": "0:06:28", "throughput": 4249.14, "total_tokens": 23243472}
7145
+ {"current_steps": 35635, "total_steps": 38160, "loss": 0.3939, "lr": 1.328836551190521e-05, "epoch": 18.67662473794549, "percentage": 93.38, "elapsed_time": "1:31:10", "remaining_time": "0:06:27", "throughput": 4249.11, "total_tokens": 23246032}
7146
+ {"current_steps": 35640, "total_steps": 38160, "loss": 0.3871, "lr": 1.323604433059905e-05, "epoch": 18.67924528301887, "percentage": 93.4, "elapsed_time": "1:31:11", "remaining_time": "0:06:26", "throughput": 4249.05, "total_tokens": 23248304}
7147
+ {"current_steps": 35645, "total_steps": 38160, "loss": 0.4204, "lr": 1.3183824974759063e-05, "epoch": 18.681865828092242, "percentage": 93.41, "elapsed_time": "1:31:12", "remaining_time": "0:06:26", "throughput": 4249.08, "total_tokens": 23251632}
7148
+ {"current_steps": 35650, "total_steps": 38160, "loss": 0.3709, "lr": 1.3131707455309006e-05, "epoch": 18.68448637316562, "percentage": 93.42, "elapsed_time": "1:31:12", "remaining_time": "0:06:25", "throughput": 4249.15, "total_tokens": 23255216}
7149
+ {"current_steps": 35655, "total_steps": 38160, "loss": 0.5033, "lr": 1.307969178315127e-05, "epoch": 18.687106918238992, "percentage": 93.44, "elapsed_time": "1:31:13", "remaining_time": "0:06:24", "throughput": 4249.21, "total_tokens": 23258736}
7150
+ {"current_steps": 35660, "total_steps": 38160, "loss": 0.3184, "lr": 1.3027777969166932e-05, "epoch": 18.68972746331237, "percentage": 93.45, "elapsed_time": "1:31:14", "remaining_time": "0:06:23", "throughput": 4249.24, "total_tokens": 23261872}
7151
+ {"current_steps": 35665, "total_steps": 38160, "loss": 0.5256, "lr": 1.2975966024215746e-05, "epoch": 18.692348008385743, "percentage": 93.46, "elapsed_time": "1:31:15", "remaining_time": "0:06:23", "throughput": 4249.27, "total_tokens": 23265008}
7152
+ {"current_steps": 35670, "total_steps": 38160, "loss": 0.3927, "lr": 1.2924255959136267e-05, "epoch": 18.69496855345912, "percentage": 93.47, "elapsed_time": "1:31:15", "remaining_time": "0:06:22", "throughput": 4249.35, "total_tokens": 23268912}
7153
+ {"current_steps": 35675, "total_steps": 38160, "loss": 0.2467, "lr": 1.2872647784745561e-05, "epoch": 18.697589098532493, "percentage": 93.49, "elapsed_time": "1:31:16", "remaining_time": "0:06:21", "throughput": 4249.32, "total_tokens": 23271344}
7154
+ {"current_steps": 35680, "total_steps": 38160, "loss": 0.4069, "lr": 1.282114151183944e-05, "epoch": 18.70020964360587, "percentage": 93.5, "elapsed_time": "1:31:17", "remaining_time": "0:06:20", "throughput": 4249.39, "total_tokens": 23274960}
7155
+ {"current_steps": 35685, "total_steps": 38160, "loss": 0.4416, "lr": 1.2769737151192562e-05, "epoch": 18.702830188679247, "percentage": 93.51, "elapsed_time": "1:31:17", "remaining_time": "0:06:19", "throughput": 4249.42, "total_tokens": 23278160}
7156
+ {"current_steps": 35690, "total_steps": 38160, "loss": 0.4615, "lr": 1.2718434713558047e-05, "epoch": 18.70545073375262, "percentage": 93.53, "elapsed_time": "1:31:18", "remaining_time": "0:06:19", "throughput": 4249.5, "total_tokens": 23282192}
7157
+ {"current_steps": 35695, "total_steps": 38160, "loss": 0.3885, "lr": 1.2667234209667755e-05, "epoch": 18.708071278825997, "percentage": 93.54, "elapsed_time": "1:31:19", "remaining_time": "0:06:18", "throughput": 4249.56, "total_tokens": 23285648}
7158
+ {"current_steps": 35700, "total_steps": 38160, "loss": 0.6108, "lr": 1.2616135650232286e-05, "epoch": 18.71069182389937, "percentage": 93.55, "elapsed_time": "1:31:20", "remaining_time": "0:06:17", "throughput": 4249.54, "total_tokens": 23288304}
7159
+ {"current_steps": 35705, "total_steps": 38160, "loss": 0.384, "lr": 1.2565139045940866e-05, "epoch": 18.713312368972748, "percentage": 93.57, "elapsed_time": "1:31:20", "remaining_time": "0:06:16", "throughput": 4249.56, "total_tokens": 23291408}
7160
+ {"current_steps": 35710, "total_steps": 38160, "loss": 0.4041, "lr": 1.2514244407461462e-05, "epoch": 18.71593291404612, "percentage": 93.58, "elapsed_time": "1:31:21", "remaining_time": "0:06:16", "throughput": 4249.57, "total_tokens": 23294448}
7161
+ {"current_steps": 35715, "total_steps": 38160, "loss": 0.4618, "lr": 1.2463451745440501e-05, "epoch": 18.718553459119498, "percentage": 93.59, "elapsed_time": "1:31:22", "remaining_time": "0:06:15", "throughput": 4249.57, "total_tokens": 23297232}
7162
+ {"current_steps": 35720, "total_steps": 38160, "loss": 0.4629, "lr": 1.241276107050343e-05, "epoch": 18.72117400419287, "percentage": 93.61, "elapsed_time": "1:31:23", "remaining_time": "0:06:14", "throughput": 4249.67, "total_tokens": 23301264}
7163
+ {"current_steps": 35725, "total_steps": 38160, "loss": 0.4489, "lr": 1.236217239325399e-05, "epoch": 18.72379454926625, "percentage": 93.62, "elapsed_time": "1:31:23", "remaining_time": "0:06:13", "throughput": 4249.72, "total_tokens": 23304720}
7164
+ {"current_steps": 35730, "total_steps": 38160, "loss": 0.4643, "lr": 1.2311685724274768e-05, "epoch": 18.72641509433962, "percentage": 93.63, "elapsed_time": "1:31:24", "remaining_time": "0:06:13", "throughput": 4249.79, "total_tokens": 23308592}
7165
+ {"current_steps": 35735, "total_steps": 38160, "loss": 0.8562, "lr": 1.2261301074127096e-05, "epoch": 18.729035639413, "percentage": 93.65, "elapsed_time": "1:31:25", "remaining_time": "0:06:12", "throughput": 4249.85, "total_tokens": 23312080}
7166
+ {"current_steps": 35740, "total_steps": 38160, "loss": 0.6608, "lr": 1.2211018453350874e-05, "epoch": 18.731656184486372, "percentage": 93.66, "elapsed_time": "1:31:26", "remaining_time": "0:06:11", "throughput": 4249.83, "total_tokens": 23314800}
7167
+ {"current_steps": 35745, "total_steps": 38160, "loss": 0.5304, "lr": 1.2160837872464581e-05, "epoch": 18.73427672955975, "percentage": 93.67, "elapsed_time": "1:31:26", "remaining_time": "0:06:10", "throughput": 4249.84, "total_tokens": 23317840}
7168
+ {"current_steps": 35750, "total_steps": 38160, "loss": 0.4351, "lr": 1.2110759341965428e-05, "epoch": 18.736897274633122, "percentage": 93.68, "elapsed_time": "1:31:27", "remaining_time": "0:06:09", "throughput": 4249.89, "total_tokens": 23321200}
7169
+ {"current_steps": 35755, "total_steps": 38160, "loss": 0.5064, "lr": 1.2060782872329256e-05, "epoch": 18.7395178197065, "percentage": 93.7, "elapsed_time": "1:31:28", "remaining_time": "0:06:09", "throughput": 4249.86, "total_tokens": 23323824}
7170
+ {"current_steps": 35760, "total_steps": 38160, "loss": 0.4459, "lr": 1.2010908474010595e-05, "epoch": 18.742138364779873, "percentage": 93.71, "elapsed_time": "1:31:28", "remaining_time": "0:06:08", "throughput": 4249.88, "total_tokens": 23326928}
7171
+ {"current_steps": 35765, "total_steps": 38160, "loss": 0.5196, "lr": 1.1961136157442654e-05, "epoch": 18.74475890985325, "percentage": 93.72, "elapsed_time": "1:31:29", "remaining_time": "0:06:07", "throughput": 4249.95, "total_tokens": 23330576}
7172
+ {"current_steps": 35770, "total_steps": 38160, "loss": 0.4839, "lr": 1.1911465933037214e-05, "epoch": 18.747379454926623, "percentage": 93.74, "elapsed_time": "1:31:30", "remaining_time": "0:06:06", "throughput": 4250.06, "total_tokens": 23334832}
7173
+ {"current_steps": 35775, "total_steps": 38160, "loss": 0.3525, "lr": 1.1861897811184686e-05, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "1:31:31", "remaining_time": "0:06:06", "throughput": 4250.04, "total_tokens": 23337488}
7174
+ {"current_steps": 35780, "total_steps": 38160, "loss": 0.3354, "lr": 1.1812431802254109e-05, "epoch": 18.752620545073377, "percentage": 93.76, "elapsed_time": "1:31:31", "remaining_time": "0:06:05", "throughput": 4250.12, "total_tokens": 23341296}
7175
+ {"current_steps": 35785, "total_steps": 38160, "loss": 0.4981, "lr": 1.1763067916593262e-05, "epoch": 18.75524109014675, "percentage": 93.78, "elapsed_time": "1:31:33", "remaining_time": "0:06:04", "throughput": 4250.4, "total_tokens": 23348144}
7176
+ {"current_steps": 35790, "total_steps": 38160, "loss": 0.3961, "lr": 1.1713806164528496e-05, "epoch": 18.757861635220127, "percentage": 93.79, "elapsed_time": "1:31:33", "remaining_time": "0:06:03", "throughput": 4250.48, "total_tokens": 23352080}
7177
+ {"current_steps": 35795, "total_steps": 38160, "loss": 0.2556, "lr": 1.1664646556364844e-05, "epoch": 18.7604821802935, "percentage": 93.8, "elapsed_time": "1:31:34", "remaining_time": "0:06:03", "throughput": 4250.52, "total_tokens": 23355536}
7178
+ {"current_steps": 35800, "total_steps": 38160, "loss": 0.3476, "lr": 1.161558910238597e-05, "epoch": 18.763102725366878, "percentage": 93.82, "elapsed_time": "1:31:35", "remaining_time": "0:06:02", "throughput": 4250.5, "total_tokens": 23358160}
7179
+ {"current_steps": 35805, "total_steps": 38160, "loss": 0.5274, "lr": 1.1566633812854e-05, "epoch": 18.76572327044025, "percentage": 93.83, "elapsed_time": "1:31:36", "remaining_time": "0:06:01", "throughput": 4250.53, "total_tokens": 23361296}
7180
+ {"current_steps": 35810, "total_steps": 38160, "loss": 0.4236, "lr": 1.151778069800985e-05, "epoch": 18.768343815513628, "percentage": 93.84, "elapsed_time": "1:31:36", "remaining_time": "0:06:00", "throughput": 4250.62, "total_tokens": 23365264}
7181
+ {"current_steps": 35815, "total_steps": 38160, "loss": 0.3163, "lr": 1.1469029768073125e-05, "epoch": 18.770964360587, "percentage": 93.85, "elapsed_time": "1:31:37", "remaining_time": "0:05:59", "throughput": 4250.63, "total_tokens": 23368208}
7182
+ {"current_steps": 35820, "total_steps": 38160, "loss": 0.4608, "lr": 1.1420381033241889e-05, "epoch": 18.77358490566038, "percentage": 93.87, "elapsed_time": "1:31:38", "remaining_time": "0:05:59", "throughput": 4250.63, "total_tokens": 23371248}
7183
+ {"current_steps": 35825, "total_steps": 38160, "loss": 0.3427, "lr": 1.1371834503693002e-05, "epoch": 18.77620545073375, "percentage": 93.88, "elapsed_time": "1:31:39", "remaining_time": "0:05:58", "throughput": 4250.71, "total_tokens": 23374960}
7184
+ {"current_steps": 35830, "total_steps": 38160, "loss": 0.3504, "lr": 1.1323390189581784e-05, "epoch": 18.77882599580713, "percentage": 93.89, "elapsed_time": "1:31:39", "remaining_time": "0:05:57", "throughput": 4250.78, "total_tokens": 23378832}
7185
+ {"current_steps": 35835, "total_steps": 38160, "loss": 0.514, "lr": 1.127504810104213e-05, "epoch": 18.781446540880502, "percentage": 93.91, "elapsed_time": "1:31:40", "remaining_time": "0:05:56", "throughput": 4250.81, "total_tokens": 23382032}
7186
+ {"current_steps": 35840, "total_steps": 38160, "loss": 0.3865, "lr": 1.1226808248186782e-05, "epoch": 18.78406708595388, "percentage": 93.92, "elapsed_time": "1:31:41", "remaining_time": "0:05:56", "throughput": 4250.85, "total_tokens": 23385456}
7187
+ {"current_steps": 35845, "total_steps": 38160, "loss": 0.4484, "lr": 1.1178670641106891e-05, "epoch": 18.786687631027252, "percentage": 93.93, "elapsed_time": "1:31:42", "remaining_time": "0:05:55", "throughput": 4250.87, "total_tokens": 23388592}
7188
+ {"current_steps": 35850, "total_steps": 38160, "loss": 0.4082, "lr": 1.1130635289872403e-05, "epoch": 18.78930817610063, "percentage": 93.95, "elapsed_time": "1:31:42", "remaining_time": "0:05:54", "throughput": 4250.9, "total_tokens": 23391824}
7189
+ {"current_steps": 35855, "total_steps": 38160, "loss": 0.4023, "lr": 1.1082702204531725e-05, "epoch": 18.791928721174003, "percentage": 93.96, "elapsed_time": "1:31:43", "remaining_time": "0:05:53", "throughput": 4250.93, "total_tokens": 23394896}
7190
+ {"current_steps": 35860, "total_steps": 38160, "loss": 0.3763, "lr": 1.1034871395111778e-05, "epoch": 18.79454926624738, "percentage": 93.97, "elapsed_time": "1:31:44", "remaining_time": "0:05:53", "throughput": 4251.09, "total_tokens": 23399888}
7191
+ {"current_steps": 35865, "total_steps": 38160, "loss": 0.3961, "lr": 1.0987142871618394e-05, "epoch": 18.797169811320753, "percentage": 93.99, "elapsed_time": "1:31:45", "remaining_time": "0:05:52", "throughput": 4251.1, "total_tokens": 23402832}
7192
+ {"current_steps": 35870, "total_steps": 38160, "loss": 0.5302, "lr": 1.0939516644035696e-05, "epoch": 18.79979035639413, "percentage": 94.0, "elapsed_time": "1:31:45", "remaining_time": "0:05:51", "throughput": 4251.12, "total_tokens": 23405968}
7193
+ {"current_steps": 35875, "total_steps": 38160, "loss": 0.5617, "lr": 1.0891992722326716e-05, "epoch": 18.802410901467507, "percentage": 94.01, "elapsed_time": "1:31:46", "remaining_time": "0:05:50", "throughput": 4251.11, "total_tokens": 23408656}
7194
+ {"current_steps": 35880, "total_steps": 38160, "loss": 0.59, "lr": 1.0844571116432778e-05, "epoch": 18.80503144654088, "percentage": 94.03, "elapsed_time": "1:31:47", "remaining_time": "0:05:49", "throughput": 4251.14, "total_tokens": 23411952}
7195
+ {"current_steps": 35885, "total_steps": 38160, "loss": 0.4504, "lr": 1.0797251836274003e-05, "epoch": 18.807651991614257, "percentage": 94.04, "elapsed_time": "1:31:47", "remaining_time": "0:05:49", "throughput": 4251.15, "total_tokens": 23414896}
7196
+ {"current_steps": 35890, "total_steps": 38160, "loss": 0.5086, "lr": 1.0750034891748972e-05, "epoch": 18.81027253668763, "percentage": 94.05, "elapsed_time": "1:31:48", "remaining_time": "0:05:48", "throughput": 4251.18, "total_tokens": 23418224}
7197
+ {"current_steps": 35895, "total_steps": 38160, "loss": 0.3793, "lr": 1.0702920292735009e-05, "epoch": 18.812893081761008, "percentage": 94.06, "elapsed_time": "1:31:49", "remaining_time": "0:05:47", "throughput": 4251.18, "total_tokens": 23421008}
7198
+ {"current_steps": 35900, "total_steps": 38160, "loss": 0.3846, "lr": 1.0655908049087893e-05, "epoch": 18.81551362683438, "percentage": 94.08, "elapsed_time": "1:31:49", "remaining_time": "0:05:46", "throughput": 4251.17, "total_tokens": 23423792}
7199
+ {"current_steps": 35905, "total_steps": 38160, "loss": 0.4634, "lr": 1.0608998170642149e-05, "epoch": 18.818134171907758, "percentage": 94.09, "elapsed_time": "1:31:50", "remaining_time": "0:05:46", "throughput": 4251.14, "total_tokens": 23426320}
7200
+ {"current_steps": 35910, "total_steps": 38160, "loss": 0.4991, "lr": 1.0562190667210703e-05, "epoch": 18.82075471698113, "percentage": 94.1, "elapsed_time": "1:31:51", "remaining_time": "0:05:45", "throughput": 4251.19, "total_tokens": 23429840}
7201
+ {"current_steps": 35915, "total_steps": 38160, "loss": 0.3364, "lr": 1.0515485548585113e-05, "epoch": 18.82337526205451, "percentage": 94.12, "elapsed_time": "1:31:52", "remaining_time": "0:05:44", "throughput": 4251.17, "total_tokens": 23432720}
7202
+ {"current_steps": 35920, "total_steps": 38160, "loss": 0.3472, "lr": 1.0468882824535676e-05, "epoch": 18.82599580712788, "percentage": 94.13, "elapsed_time": "1:31:52", "remaining_time": "0:05:43", "throughput": 4251.25, "total_tokens": 23436592}
7203
+ {"current_steps": 35925, "total_steps": 38160, "loss": 0.3523, "lr": 1.0422382504811034e-05, "epoch": 18.82861635220126, "percentage": 94.14, "elapsed_time": "1:31:53", "remaining_time": "0:05:43", "throughput": 4251.29, "total_tokens": 23439856}
7204
+ {"current_steps": 35930, "total_steps": 38160, "loss": 0.3457, "lr": 1.0375984599138633e-05, "epoch": 18.831236897274632, "percentage": 94.16, "elapsed_time": "1:31:54", "remaining_time": "0:05:42", "throughput": 4251.29, "total_tokens": 23442832}
7205
+ {"current_steps": 35935, "total_steps": 38160, "loss": 0.3674, "lr": 1.0329689117224261e-05, "epoch": 18.83385744234801, "percentage": 94.17, "elapsed_time": "1:31:55", "remaining_time": "0:05:41", "throughput": 4251.35, "total_tokens": 23446416}
7206
+ {"current_steps": 35940, "total_steps": 38160, "loss": 0.399, "lr": 1.0283496068752507e-05, "epoch": 18.836477987421382, "percentage": 94.18, "elapsed_time": "1:31:55", "remaining_time": "0:05:40", "throughput": 4251.34, "total_tokens": 23449104}
7207
+ {"current_steps": 35945, "total_steps": 38160, "loss": 0.4735, "lr": 1.0237405463386418e-05, "epoch": 18.83909853249476, "percentage": 94.2, "elapsed_time": "1:31:56", "remaining_time": "0:05:39", "throughput": 4251.36, "total_tokens": 23452272}
7208
+ {"current_steps": 35950, "total_steps": 38160, "loss": 0.4437, "lr": 1.0191417310767503e-05, "epoch": 18.841719077568133, "percentage": 94.21, "elapsed_time": "1:31:57", "remaining_time": "0:05:39", "throughput": 4251.35, "total_tokens": 23455088}
7209
+ {"current_steps": 35955, "total_steps": 38160, "loss": 0.4573, "lr": 1.0145531620516179e-05, "epoch": 18.84433962264151, "percentage": 94.22, "elapsed_time": "1:31:57", "remaining_time": "0:05:38", "throughput": 4251.41, "total_tokens": 23458704}
7210
+ {"current_steps": 35960, "total_steps": 38160, "loss": 0.4269, "lr": 1.009974840223099e-05, "epoch": 18.846960167714883, "percentage": 94.23, "elapsed_time": "1:31:58", "remaining_time": "0:05:37", "throughput": 4251.5, "total_tokens": 23462704}
7211
+ {"current_steps": 35965, "total_steps": 38160, "loss": 0.3427, "lr": 1.0054067665489386e-05, "epoch": 18.84958071278826, "percentage": 94.25, "elapsed_time": "1:31:59", "remaining_time": "0:05:36", "throughput": 4251.48, "total_tokens": 23465328}
7212
+ {"current_steps": 35970, "total_steps": 38160, "loss": 0.4781, "lr": 1.0008489419847278e-05, "epoch": 18.852201257861637, "percentage": 94.26, "elapsed_time": "1:32:00", "remaining_time": "0:05:36", "throughput": 4251.55, "total_tokens": 23468912}
7213
+ {"current_steps": 35975, "total_steps": 38160, "loss": 0.3724, "lr": 9.963013674839038e-06, "epoch": 18.85482180293501, "percentage": 94.27, "elapsed_time": "1:32:00", "remaining_time": "0:05:35", "throughput": 4251.56, "total_tokens": 23471984}
7214
+ {"current_steps": 35980, "total_steps": 38160, "loss": 0.3482, "lr": 9.91764043997767e-06, "epoch": 18.857442348008387, "percentage": 94.29, "elapsed_time": "1:32:01", "remaining_time": "0:05:34", "throughput": 4251.63, "total_tokens": 23475888}
7215
+ {"current_steps": 35985, "total_steps": 38160, "loss": 0.3959, "lr": 9.872369724754804e-06, "epoch": 18.86006289308176, "percentage": 94.3, "elapsed_time": "1:32:02", "remaining_time": "0:05:33", "throughput": 4251.63, "total_tokens": 23478704}
7216
+ {"current_steps": 35990, "total_steps": 38160, "loss": 0.3791, "lr": 9.827201538640473e-06, "epoch": 18.862683438155138, "percentage": 94.31, "elapsed_time": "1:32:02", "remaining_time": "0:05:33", "throughput": 4251.59, "total_tokens": 23481136}
7217
+ {"current_steps": 35995, "total_steps": 38160, "loss": 0.3275, "lr": 9.782135891083455e-06, "epoch": 18.86530398322851, "percentage": 94.33, "elapsed_time": "1:32:03", "remaining_time": "0:05:32", "throughput": 4251.53, "total_tokens": 23483408}
7218
+ {"current_steps": 36000, "total_steps": 38160, "loss": 0.4428, "lr": 9.737172791510873e-06, "epoch": 18.867924528301888, "percentage": 94.34, "elapsed_time": "1:32:04", "remaining_time": "0:05:31", "throughput": 4251.63, "total_tokens": 23487568}
7219
+ {"current_steps": 36005, "total_steps": 38160, "loss": 0.486, "lr": 9.692312249328483e-06, "epoch": 18.87054507337526, "percentage": 94.35, "elapsed_time": "1:32:05", "remaining_time": "0:05:30", "throughput": 4251.63, "total_tokens": 23490512}
7220
+ {"current_steps": 36010, "total_steps": 38160, "loss": 0.4898, "lr": 9.647554273920722e-06, "epoch": 18.87316561844864, "percentage": 94.37, "elapsed_time": "1:32:05", "remaining_time": "0:05:29", "throughput": 4251.66, "total_tokens": 23493680}
7221
+ {"current_steps": 36015, "total_steps": 38160, "loss": 0.4379, "lr": 9.602898874650323e-06, "epoch": 18.87578616352201, "percentage": 94.38, "elapsed_time": "1:32:06", "remaining_time": "0:05:29", "throughput": 4251.75, "total_tokens": 23497680}
7222
+ {"current_steps": 36020, "total_steps": 38160, "loss": 0.4638, "lr": 9.558346060858759e-06, "epoch": 18.87840670859539, "percentage": 94.39, "elapsed_time": "1:32:07", "remaining_time": "0:05:28", "throughput": 4251.74, "total_tokens": 23500464}
7223
+ {"current_steps": 36025, "total_steps": 38160, "loss": 0.3628, "lr": 9.51389584186596e-06, "epoch": 18.881027253668762, "percentage": 94.41, "elapsed_time": "1:32:07", "remaining_time": "0:05:27", "throughput": 4251.73, "total_tokens": 23503184}
7224
+ {"current_steps": 36030, "total_steps": 38160, "loss": 0.4666, "lr": 9.469548226970326e-06, "epoch": 18.88364779874214, "percentage": 94.42, "elapsed_time": "1:32:08", "remaining_time": "0:05:26", "throughput": 4251.72, "total_tokens": 23506064}
7225
+ {"current_steps": 36035, "total_steps": 38160, "loss": 0.4821, "lr": 9.425303225448989e-06, "epoch": 18.886268343815512, "percentage": 94.43, "elapsed_time": "1:32:09", "remaining_time": "0:05:26", "throughput": 4251.71, "total_tokens": 23508848}
7226
+ {"current_steps": 36040, "total_steps": 38160, "loss": 0.4835, "lr": 9.381160846557435e-06, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "1:32:10", "remaining_time": "0:05:25", "throughput": 4251.77, "total_tokens": 23512432}
7227
+ {"current_steps": 36045, "total_steps": 38160, "loss": 0.5042, "lr": 9.337121099529722e-06, "epoch": 18.891509433962263, "percentage": 94.46, "elapsed_time": "1:32:10", "remaining_time": "0:05:24", "throughput": 4251.76, "total_tokens": 23515312}
7228
+ {"current_steps": 36050, "total_steps": 38160, "loss": 0.4751, "lr": 9.293183993578535e-06, "epoch": 18.89412997903564, "percentage": 94.47, "elapsed_time": "1:32:11", "remaining_time": "0:05:23", "throughput": 4251.83, "total_tokens": 23519024}
7229
+ {"current_steps": 36055, "total_steps": 38160, "loss": 0.3286, "lr": 9.249349537894968e-06, "epoch": 18.896750524109013, "percentage": 94.48, "elapsed_time": "1:32:12", "remaining_time": "0:05:22", "throughput": 4251.85, "total_tokens": 23522032}
7230
+ {"current_steps": 36060, "total_steps": 38160, "loss": 0.5296, "lr": 9.205617741648686e-06, "epoch": 18.89937106918239, "percentage": 94.5, "elapsed_time": "1:32:12", "remaining_time": "0:05:22", "throughput": 4251.85, "total_tokens": 23524912}
7231
+ {"current_steps": 36065, "total_steps": 38160, "loss": 0.3009, "lr": 9.161988613987982e-06, "epoch": 18.901991614255767, "percentage": 94.51, "elapsed_time": "1:32:13", "remaining_time": "0:05:21", "throughput": 4251.86, "total_tokens": 23527984}
7232
+ {"current_steps": 36070, "total_steps": 38160, "loss": 0.4853, "lr": 9.118462164039387e-06, "epoch": 18.90461215932914, "percentage": 94.52, "elapsed_time": "1:32:14", "remaining_time": "0:05:20", "throughput": 4251.94, "total_tokens": 23531792}
7233
+ {"current_steps": 36075, "total_steps": 38160, "loss": 0.415, "lr": 9.07503840090823e-06, "epoch": 18.907232704402517, "percentage": 94.54, "elapsed_time": "1:32:15", "remaining_time": "0:05:19", "throughput": 4252.01, "total_tokens": 23535504}
7234
+ {"current_steps": 36080, "total_steps": 38160, "loss": 0.4045, "lr": 9.031717333678303e-06, "epoch": 18.90985324947589, "percentage": 94.55, "elapsed_time": "1:32:15", "remaining_time": "0:05:19", "throughput": 4252.06, "total_tokens": 23538864}
7235
+ {"current_steps": 36085, "total_steps": 38160, "loss": 0.4879, "lr": 8.988498971411851e-06, "epoch": 18.912473794549268, "percentage": 94.56, "elapsed_time": "1:32:16", "remaining_time": "0:05:18", "throughput": 4252.14, "total_tokens": 23542736}
7236
+ {"current_steps": 36090, "total_steps": 38160, "loss": 0.4748, "lr": 8.945383323149647e-06, "epoch": 18.91509433962264, "percentage": 94.58, "elapsed_time": "1:32:17", "remaining_time": "0:05:17", "throughput": 4252.3, "total_tokens": 23547504}
7237
+ {"current_steps": 36095, "total_steps": 38160, "loss": 0.3808, "lr": 8.902370397911031e-06, "epoch": 18.917714884696018, "percentage": 94.59, "elapsed_time": "1:32:18", "remaining_time": "0:05:16", "throughput": 4252.28, "total_tokens": 23550224}
7238
+ {"current_steps": 36100, "total_steps": 38160, "loss": 0.5029, "lr": 8.859460204693748e-06, "epoch": 18.92033542976939, "percentage": 94.6, "elapsed_time": "1:32:18", "remaining_time": "0:05:16", "throughput": 4252.32, "total_tokens": 23553488}
7239
+ {"current_steps": 36105, "total_steps": 38160, "loss": 0.5101, "lr": 8.816652752474175e-06, "epoch": 18.92295597484277, "percentage": 94.61, "elapsed_time": "1:32:19", "remaining_time": "0:05:15", "throughput": 4252.35, "total_tokens": 23556816}
7240
+ {"current_steps": 36110, "total_steps": 38160, "loss": 0.3153, "lr": 8.773948050207148e-06, "epoch": 18.92557651991614, "percentage": 94.63, "elapsed_time": "1:32:20", "remaining_time": "0:05:14", "throughput": 4252.32, "total_tokens": 23559280}
7241
+ {"current_steps": 36115, "total_steps": 38160, "loss": 0.5044, "lr": 8.731346106826021e-06, "epoch": 18.92819706498952, "percentage": 94.64, "elapsed_time": "1:32:21", "remaining_time": "0:05:13", "throughput": 4252.33, "total_tokens": 23562416}
7242
+ {"current_steps": 36120, "total_steps": 38160, "loss": 0.3342, "lr": 8.6888469312425e-06, "epoch": 18.930817610062892, "percentage": 94.65, "elapsed_time": "1:32:21", "remaining_time": "0:05:12", "throughput": 4252.39, "total_tokens": 23566032}
7243
+ {"current_steps": 36125, "total_steps": 38160, "loss": 0.484, "lr": 8.646450532347083e-06, "epoch": 18.93343815513627, "percentage": 94.67, "elapsed_time": "1:32:22", "remaining_time": "0:05:12", "throughput": 4252.39, "total_tokens": 23568880}
7244
+ {"current_steps": 36130, "total_steps": 38160, "loss": 0.3792, "lr": 8.604156919008565e-06, "epoch": 18.936058700209642, "percentage": 94.68, "elapsed_time": "1:32:23", "remaining_time": "0:05:11", "throughput": 4252.34, "total_tokens": 23571216}
7245
+ {"current_steps": 36135, "total_steps": 38160, "loss": 0.5367, "lr": 8.561966100074258e-06, "epoch": 18.93867924528302, "percentage": 94.69, "elapsed_time": "1:32:23", "remaining_time": "0:05:10", "throughput": 4252.45, "total_tokens": 23575216}
7246
+ {"current_steps": 36140, "total_steps": 38160, "loss": 0.401, "lr": 8.519878084370048e-06, "epoch": 18.941299790356393, "percentage": 94.71, "elapsed_time": "1:32:24", "remaining_time": "0:05:09", "throughput": 4252.49, "total_tokens": 23578576}
7247
+ {"current_steps": 36145, "total_steps": 38160, "loss": 0.3816, "lr": 8.477892880700222e-06, "epoch": 18.94392033542977, "percentage": 94.72, "elapsed_time": "1:32:25", "remaining_time": "0:05:09", "throughput": 4252.43, "total_tokens": 23580848}
7248
+ {"current_steps": 36150, "total_steps": 38160, "loss": 0.5002, "lr": 8.436010497847646e-06, "epoch": 18.946540880503143, "percentage": 94.73, "elapsed_time": "1:32:25", "remaining_time": "0:05:08", "throughput": 4252.41, "total_tokens": 23583440}
7249
+ {"current_steps": 36155, "total_steps": 38160, "loss": 0.3324, "lr": 8.39423094457359e-06, "epoch": 18.94916142557652, "percentage": 94.75, "elapsed_time": "1:32:26", "remaining_time": "0:05:07", "throughput": 4252.44, "total_tokens": 23586640}
7250
+ {"current_steps": 36160, "total_steps": 38160, "loss": 0.3794, "lr": 8.352554229617892e-06, "epoch": 18.951781970649897, "percentage": 94.76, "elapsed_time": "1:32:27", "remaining_time": "0:05:06", "throughput": 4252.48, "total_tokens": 23589968}
7251
+ {"current_steps": 36165, "total_steps": 38160, "loss": 0.3504, "lr": 8.310980361698861e-06, "epoch": 18.95440251572327, "percentage": 94.77, "elapsed_time": "1:32:27", "remaining_time": "0:05:06", "throughput": 4252.44, "total_tokens": 23592368}
7252
+ {"current_steps": 36170, "total_steps": 38160, "loss": 0.3765, "lr": 8.26950934951326e-06, "epoch": 18.957023060796647, "percentage": 94.79, "elapsed_time": "1:32:28", "remaining_time": "0:05:05", "throughput": 4252.44, "total_tokens": 23595248}
7253
+ {"current_steps": 36175, "total_steps": 38160, "loss": 0.4357, "lr": 8.22814120173626e-06, "epoch": 18.95964360587002, "percentage": 94.8, "elapsed_time": "1:32:29", "remaining_time": "0:05:04", "throughput": 4252.47, "total_tokens": 23598384}
7254
+ {"current_steps": 36180, "total_steps": 38160, "loss": 0.481, "lr": 8.186875927021775e-06, "epoch": 18.962264150943398, "percentage": 94.81, "elapsed_time": "1:32:30", "remaining_time": "0:05:03", "throughput": 4252.53, "total_tokens": 23602128}
7255
+ {"current_steps": 36185, "total_steps": 38160, "loss": 0.316, "lr": 8.145713534001897e-06, "epoch": 18.96488469601677, "percentage": 94.82, "elapsed_time": "1:32:30", "remaining_time": "0:05:02", "throughput": 4252.55, "total_tokens": 23605232}
7256
+ {"current_steps": 36190, "total_steps": 38160, "loss": 0.3828, "lr": 8.104654031287406e-06, "epoch": 18.967505241090148, "percentage": 94.84, "elapsed_time": "1:32:31", "remaining_time": "0:05:02", "throughput": 4252.56, "total_tokens": 23608272}
7257
+ {"current_steps": 36195, "total_steps": 38160, "loss": 0.3777, "lr": 8.063697427467486e-06, "epoch": 18.97012578616352, "percentage": 94.85, "elapsed_time": "1:32:32", "remaining_time": "0:05:01", "throughput": 4252.51, "total_tokens": 23610576}
7258
+ {"current_steps": 36200, "total_steps": 38160, "loss": 0.4814, "lr": 8.022843731109675e-06, "epoch": 18.9727463312369, "percentage": 94.86, "elapsed_time": "1:32:32", "remaining_time": "0:05:00", "throughput": 4252.56, "total_tokens": 23614000}
7259
+ {"current_steps": 36205, "total_steps": 38160, "loss": 0.3688, "lr": 7.982092950760245e-06, "epoch": 18.97536687631027, "percentage": 94.88, "elapsed_time": "1:32:33", "remaining_time": "0:04:59", "throughput": 4252.59, "total_tokens": 23617264}
7260
+ {"current_steps": 36210, "total_steps": 38160, "loss": 0.4542, "lr": 7.941445094943711e-06, "epoch": 18.97798742138365, "percentage": 94.89, "elapsed_time": "1:32:34", "remaining_time": "0:04:59", "throughput": 4252.59, "total_tokens": 23620240}
7261
+ {"current_steps": 36215, "total_steps": 38160, "loss": 0.3813, "lr": 7.900900172163107e-06, "epoch": 18.980607966457022, "percentage": 94.9, "elapsed_time": "1:32:35", "remaining_time": "0:04:58", "throughput": 4252.6, "total_tokens": 23623216}
7262
+ {"current_steps": 36220, "total_steps": 38160, "loss": 0.3715, "lr": 7.860458190900144e-06, "epoch": 18.9832285115304, "percentage": 94.92, "elapsed_time": "1:32:35", "remaining_time": "0:04:57", "throughput": 4252.6, "total_tokens": 23626064}
7263
+ {"current_steps": 36225, "total_steps": 38160, "loss": 0.4644, "lr": 7.820119159614669e-06, "epoch": 18.985849056603772, "percentage": 94.93, "elapsed_time": "1:32:36", "remaining_time": "0:04:56", "throughput": 4252.64, "total_tokens": 23629360}
7264
+ {"current_steps": 36230, "total_steps": 38160, "loss": 0.4304, "lr": 7.779883086745098e-06, "epoch": 18.98846960167715, "percentage": 94.94, "elapsed_time": "1:32:37", "remaining_time": "0:04:56", "throughput": 4252.6, "total_tokens": 23631792}
7265
+ {"current_steps": 36235, "total_steps": 38160, "loss": 0.3842, "lr": 7.739749980708533e-06, "epoch": 18.991090146750523, "percentage": 94.96, "elapsed_time": "1:32:37", "remaining_time": "0:04:55", "throughput": 4252.74, "total_tokens": 23636400}
7266
+ {"current_steps": 36240, "total_steps": 38160, "loss": 0.4036, "lr": 7.6997198499002e-06, "epoch": 18.9937106918239, "percentage": 94.97, "elapsed_time": "1:32:38", "remaining_time": "0:04:54", "throughput": 4252.73, "total_tokens": 23639056}
7267
+ {"current_steps": 36245, "total_steps": 38160, "loss": 0.3463, "lr": 7.659792702694068e-06, "epoch": 18.996331236897273, "percentage": 94.98, "elapsed_time": "1:32:39", "remaining_time": "0:04:53", "throughput": 4252.79, "total_tokens": 23642672}
7268
+ {"current_steps": 36250, "total_steps": 38160, "loss": 0.402, "lr": 7.619968547442346e-06, "epoch": 18.99895178197065, "percentage": 94.99, "elapsed_time": "1:32:39", "remaining_time": "0:04:52", "throughput": 4252.72, "total_tokens": 23644848}
7269
+ {"current_steps": 36252, "total_steps": 38160, "eval_loss": 0.46669089794158936, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "1:32:53", "remaining_time": "0:04:53", "throughput": 4242.19, "total_tokens": 23645440}
7270
+ {"current_steps": 36255, "total_steps": 38160, "loss": 0.3034, "lr": 7.580247392475926e-06, "epoch": 19.001572327044027, "percentage": 95.01, "elapsed_time": "1:32:56", "remaining_time": "0:04:52", "throughput": 4240.83, "total_tokens": 23647040}
7271
+ {"current_steps": 36260, "total_steps": 38160, "loss": 0.4126, "lr": 7.540629246103825e-06, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:32:56", "remaining_time": "0:04:52", "throughput": 4240.82, "total_tokens": 23649760}
7272
+ {"current_steps": 36265, "total_steps": 38160, "loss": 0.3458, "lr": 7.501114116613861e-06, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:32:57", "remaining_time": "0:04:51", "throughput": 4240.82, "total_tokens": 23652672}
7273
+ {"current_steps": 36270, "total_steps": 38160, "loss": 0.4088, "lr": 7.461702012272087e-06, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:32:58", "remaining_time": "0:04:50", "throughput": 4240.83, "total_tokens": 23655584}