rbelanec commited on
Commit
52e6cbc
·
verified ·
1 Parent(s): fd6273a

Training in progress, step 16520

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +165 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7d51d738cbb5a567a4826fbdc9266200e484b0ef298364fc0489ea62e61b09a
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20b3b71ffd76e03071a52377ca5d9e6b0852be9f1fb89b67f4162f15fbcfd583
3
  size 798032
trainer_log.jsonl CHANGED
@@ -3158,3 +3158,168 @@
3158
  {"current_steps": 15695, "total_steps": 16520, "loss": 0.0971, "lr": 3.7980617469479953e-07, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:42:54", "remaining_time": "0:02:15", "throughput": 2499.06, "total_tokens": 6434832}
3159
  {"current_steps": 15700, "total_steps": 16520, "loss": 0.1391, "lr": 3.7523345633957153e-07, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:42:55", "remaining_time": "0:02:14", "throughput": 2498.98, "total_tokens": 6436976}
3160
  {"current_steps": 15705, "total_steps": 16520, "loss": 0.0693, "lr": 3.706882236798298e-07, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:42:56", "remaining_time": "0:02:13", "throughput": 2499.03, "total_tokens": 6438896}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3158
  {"current_steps": 15695, "total_steps": 16520, "loss": 0.0971, "lr": 3.7980617469479953e-07, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:42:54", "remaining_time": "0:02:15", "throughput": 2499.06, "total_tokens": 6434832}
3159
  {"current_steps": 15700, "total_steps": 16520, "loss": 0.1391, "lr": 3.7523345633957153e-07, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:42:55", "remaining_time": "0:02:14", "throughput": 2498.98, "total_tokens": 6436976}
3160
  {"current_steps": 15705, "total_steps": 16520, "loss": 0.0693, "lr": 3.706882236798298e-07, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:42:56", "remaining_time": "0:02:13", "throughput": 2499.03, "total_tokens": 6438896}
3161
+ {"current_steps": 15710, "total_steps": 16520, "loss": 0.0771, "lr": 3.6617048178887725e-07, "epoch": 19.019370460048425, "percentage": 95.1, "elapsed_time": "0:42:57", "remaining_time": "0:02:12", "throughput": 2499.04, "total_tokens": 6441040}
3162
+ {"current_steps": 15715, "total_steps": 16520, "loss": 0.0816, "lr": 3.6168023570933297e-07, "epoch": 19.02542372881356, "percentage": 95.13, "elapsed_time": "0:42:58", "remaining_time": "0:02:12", "throughput": 2499.09, "total_tokens": 6443056}
3163
+ {"current_steps": 15720, "total_steps": 16520, "loss": 0.0364, "lr": 3.5721749045312114e-07, "epoch": 19.031476997578693, "percentage": 95.16, "elapsed_time": "0:42:58", "remaining_time": "0:02:11", "throughput": 2499.13, "total_tokens": 6445136}
3164
+ {"current_steps": 15725, "total_steps": 16520, "loss": 0.0878, "lr": 3.5278225100147667e-07, "epoch": 19.037530266343826, "percentage": 95.19, "elapsed_time": "0:42:59", "remaining_time": "0:02:10", "throughput": 2499.19, "total_tokens": 6447504}
3165
+ {"current_steps": 15730, "total_steps": 16520, "loss": 0.0897, "lr": 3.4837452230492284e-07, "epoch": 19.043583535108958, "percentage": 95.22, "elapsed_time": "0:43:00", "remaining_time": "0:02:09", "throughput": 2499.23, "total_tokens": 6449488}
3166
+ {"current_steps": 15735, "total_steps": 16520, "loss": 0.1066, "lr": 3.439943092832909e-07, "epoch": 19.04963680387409, "percentage": 95.25, "elapsed_time": "0:43:01", "remaining_time": "0:02:08", "throughput": 2499.26, "total_tokens": 6451536}
3167
+ {"current_steps": 15740, "total_steps": 16520, "loss": 0.1064, "lr": 3.3964161682568663e-07, "epoch": 19.055690072639226, "percentage": 95.28, "elapsed_time": "0:43:02", "remaining_time": "0:02:07", "throughput": 2499.3, "total_tokens": 6453584}
3168
+ {"current_steps": 15745, "total_steps": 16520, "loss": 0.1836, "lr": 3.353164497904987e-07, "epoch": 19.06174334140436, "percentage": 95.31, "elapsed_time": "0:43:02", "remaining_time": "0:02:07", "throughput": 2499.35, "total_tokens": 6455600}
3169
+ {"current_steps": 15750, "total_steps": 16520, "loss": 0.117, "lr": 3.31018813005407e-07, "epoch": 19.06779661016949, "percentage": 95.34, "elapsed_time": "0:43:03", "remaining_time": "0:02:06", "throughput": 2499.4, "total_tokens": 6457616}
3170
+ {"current_steps": 15755, "total_steps": 16520, "loss": 0.1036, "lr": 3.267487112673412e-07, "epoch": 19.073849878934624, "percentage": 95.37, "elapsed_time": "0:43:04", "remaining_time": "0:02:05", "throughput": 2499.39, "total_tokens": 6459632}
3171
+ {"current_steps": 15760, "total_steps": 16520, "loss": 0.1471, "lr": 3.225061493425108e-07, "epoch": 19.079903147699756, "percentage": 95.4, "elapsed_time": "0:43:05", "remaining_time": "0:02:04", "throughput": 2499.44, "total_tokens": 6461648}
3172
+ {"current_steps": 15765, "total_steps": 16520, "loss": 0.1252, "lr": 3.1829113196638614e-07, "epoch": 19.085956416464892, "percentage": 95.43, "elapsed_time": "0:43:05", "remaining_time": "0:02:03", "throughput": 2499.47, "total_tokens": 6463600}
3173
+ {"current_steps": 15770, "total_steps": 16520, "loss": 0.0448, "lr": 3.141036638436845e-07, "epoch": 19.092009685230025, "percentage": 95.46, "elapsed_time": "0:43:06", "remaining_time": "0:02:03", "throughput": 2499.5, "total_tokens": 6465712}
3174
+ {"current_steps": 15775, "total_steps": 16520, "loss": 0.0627, "lr": 3.099437496483837e-07, "epoch": 19.098062953995157, "percentage": 95.49, "elapsed_time": "0:43:07", "remaining_time": "0:02:02", "throughput": 2499.54, "total_tokens": 6467792}
3175
+ {"current_steps": 15780, "total_steps": 16520, "loss": 0.1584, "lr": 3.058113940236945e-07, "epoch": 19.10411622276029, "percentage": 95.52, "elapsed_time": "0:43:08", "remaining_time": "0:02:01", "throughput": 2499.57, "total_tokens": 6469744}
3176
+ {"current_steps": 15785, "total_steps": 16520, "loss": 0.0776, "lr": 3.017066015820774e-07, "epoch": 19.110169491525422, "percentage": 95.55, "elapsed_time": "0:43:09", "remaining_time": "0:02:00", "throughput": 2499.64, "total_tokens": 6471792}
3177
+ {"current_steps": 15790, "total_steps": 16520, "loss": 0.1581, "lr": 2.976293769052202e-07, "epoch": 19.116222760290558, "percentage": 95.58, "elapsed_time": "0:43:09", "remaining_time": "0:01:59", "throughput": 2499.63, "total_tokens": 6473744}
3178
+ {"current_steps": 15795, "total_steps": 16520, "loss": 0.0986, "lr": 2.9357972454404637e-07, "epoch": 19.12227602905569, "percentage": 95.61, "elapsed_time": "0:43:10", "remaining_time": "0:01:58", "throughput": 2499.63, "total_tokens": 6475856}
3179
+ {"current_steps": 15800, "total_steps": 16520, "loss": 0.1289, "lr": 2.895576490187041e-07, "epoch": 19.128329297820823, "percentage": 95.64, "elapsed_time": "0:43:11", "remaining_time": "0:01:58", "throughput": 2499.66, "total_tokens": 6477872}
3180
+ {"current_steps": 15805, "total_steps": 16520, "loss": 0.1374, "lr": 2.8556315481854943e-07, "epoch": 19.134382566585955, "percentage": 95.67, "elapsed_time": "0:43:12", "remaining_time": "0:01:57", "throughput": 2499.72, "total_tokens": 6480112}
3181
+ {"current_steps": 15810, "total_steps": 16520, "loss": 0.0719, "lr": 2.8159624640216597e-07, "epoch": 19.140435835351088, "percentage": 95.7, "elapsed_time": "0:43:13", "remaining_time": "0:01:56", "throughput": 2499.73, "total_tokens": 6482160}
3182
+ {"current_steps": 15815, "total_steps": 16520, "loss": 0.0964, "lr": 2.7765692819734236e-07, "epoch": 19.146489104116224, "percentage": 95.73, "elapsed_time": "0:43:13", "remaining_time": "0:01:55", "throughput": 2499.76, "total_tokens": 6484176}
3183
+ {"current_steps": 15820, "total_steps": 16520, "loss": 0.0666, "lr": 2.737452046010641e-07, "epoch": 19.152542372881356, "percentage": 95.76, "elapsed_time": "0:43:14", "remaining_time": "0:01:54", "throughput": 2499.79, "total_tokens": 6486256}
3184
+ {"current_steps": 15825, "total_steps": 16520, "loss": 0.1174, "lr": 2.6986107997953035e-07, "epoch": 19.15859564164649, "percentage": 95.79, "elapsed_time": "0:43:15", "remaining_time": "0:01:53", "throughput": 2499.84, "total_tokens": 6488432}
3185
+ {"current_steps": 15830, "total_steps": 16520, "loss": 0.1104, "lr": 2.660045586681231e-07, "epoch": 19.16464891041162, "percentage": 95.82, "elapsed_time": "0:43:16", "remaining_time": "0:01:53", "throughput": 2499.88, "total_tokens": 6490320}
3186
+ {"current_steps": 15835, "total_steps": 16520, "loss": 0.0963, "lr": 2.621756449714158e-07, "epoch": 19.170702179176754, "percentage": 95.85, "elapsed_time": "0:43:17", "remaining_time": "0:01:52", "throughput": 2499.88, "total_tokens": 6492368}
3187
+ {"current_steps": 15840, "total_steps": 16520, "loss": 0.084, "lr": 2.5837434316317574e-07, "epoch": 19.17675544794189, "percentage": 95.88, "elapsed_time": "0:43:17", "remaining_time": "0:01:51", "throughput": 2499.9, "total_tokens": 6494448}
3188
+ {"current_steps": 15845, "total_steps": 16520, "loss": 0.1969, "lr": 2.546006574863369e-07, "epoch": 19.182808716707022, "percentage": 95.91, "elapsed_time": "0:43:18", "remaining_time": "0:01:50", "throughput": 2499.93, "total_tokens": 6496400}
3189
+ {"current_steps": 15850, "total_steps": 16520, "loss": 0.0686, "lr": 2.508545921530159e-07, "epoch": 19.188861985472155, "percentage": 95.94, "elapsed_time": "0:43:19", "remaining_time": "0:01:49", "throughput": 2499.97, "total_tokens": 6498480}
3190
+ {"current_steps": 15855, "total_steps": 16520, "loss": 0.0536, "lr": 2.47136151344507e-07, "epoch": 19.194915254237287, "percentage": 95.97, "elapsed_time": "0:43:20", "remaining_time": "0:01:49", "throughput": 2499.99, "total_tokens": 6500464}
3191
+ {"current_steps": 15860, "total_steps": 16520, "loss": 0.0675, "lr": 2.43445339211254e-07, "epoch": 19.20096852300242, "percentage": 96.0, "elapsed_time": "0:43:21", "remaining_time": "0:01:48", "throughput": 2499.99, "total_tokens": 6502512}
3192
+ {"current_steps": 15865, "total_steps": 16520, "loss": 0.0937, "lr": 2.3978215987287554e-07, "epoch": 19.207021791767556, "percentage": 96.04, "elapsed_time": "0:43:21", "remaining_time": "0:01:47", "throughput": 2500.01, "total_tokens": 6504432}
3193
+ {"current_steps": 15870, "total_steps": 16520, "loss": 0.0529, "lr": 2.361466174181426e-07, "epoch": 19.213075060532688, "percentage": 96.07, "elapsed_time": "0:43:22", "remaining_time": "0:01:46", "throughput": 2500.07, "total_tokens": 6506352}
3194
+ {"current_steps": 15875, "total_steps": 16520, "loss": 0.0747, "lr": 2.3253871590497856e-07, "epoch": 19.21912832929782, "percentage": 96.1, "elapsed_time": "0:43:23", "remaining_time": "0:01:45", "throughput": 2500.1, "total_tokens": 6508400}
3195
+ {"current_steps": 15880, "total_steps": 16520, "loss": 0.1487, "lr": 2.28958459360451e-07, "epoch": 19.225181598062953, "percentage": 96.13, "elapsed_time": "0:43:24", "remaining_time": "0:01:44", "throughput": 2500.13, "total_tokens": 6510512}
3196
+ {"current_steps": 15885, "total_steps": 16520, "loss": 0.0777, "lr": 2.2540585178078e-07, "epoch": 19.231234866828085, "percentage": 96.16, "elapsed_time": "0:43:24", "remaining_time": "0:01:44", "throughput": 2500.16, "total_tokens": 6512560}
3197
+ {"current_steps": 15890, "total_steps": 16520, "loss": 0.1394, "lr": 2.21880897131313e-07, "epoch": 19.23728813559322, "percentage": 96.19, "elapsed_time": "0:43:25", "remaining_time": "0:01:43", "throughput": 2500.2, "total_tokens": 6514544}
3198
+ {"current_steps": 15895, "total_steps": 16520, "loss": 0.1119, "lr": 2.1838359934653884e-07, "epoch": 19.243341404358354, "percentage": 96.22, "elapsed_time": "0:43:26", "remaining_time": "0:01:42", "throughput": 2500.25, "total_tokens": 6516464}
3199
+ {"current_steps": 15900, "total_steps": 16520, "loss": 0.0562, "lr": 2.1491396233007665e-07, "epoch": 19.249394673123486, "percentage": 96.25, "elapsed_time": "0:43:27", "remaining_time": "0:01:41", "throughput": 2500.27, "total_tokens": 6518480}
3200
+ {"current_steps": 15905, "total_steps": 16520, "loss": 0.097, "lr": 2.1147198995466467e-07, "epoch": 19.25544794188862, "percentage": 96.28, "elapsed_time": "0:43:27", "remaining_time": "0:01:40", "throughput": 2500.27, "total_tokens": 6520592}
3201
+ {"current_steps": 15910, "total_steps": 16520, "loss": 0.111, "lr": 2.0805768606217412e-07, "epoch": 19.26150121065375, "percentage": 96.31, "elapsed_time": "0:43:28", "remaining_time": "0:01:40", "throughput": 2500.3, "total_tokens": 6522544}
3202
+ {"current_steps": 15915, "total_steps": 16520, "loss": 0.1377, "lr": 2.046710544635788e-07, "epoch": 19.267554479418887, "percentage": 96.34, "elapsed_time": "0:43:29", "remaining_time": "0:01:39", "throughput": 2500.34, "total_tokens": 6524688}
3203
+ {"current_steps": 15920, "total_steps": 16520, "loss": 0.1103, "lr": 2.0131209893897994e-07, "epoch": 19.27360774818402, "percentage": 96.37, "elapsed_time": "0:43:30", "remaining_time": "0:01:38", "throughput": 2500.37, "total_tokens": 6526704}
3204
+ {"current_steps": 15925, "total_steps": 16520, "loss": 0.1538, "lr": 1.9798082323757016e-07, "epoch": 19.279661016949152, "percentage": 96.4, "elapsed_time": "0:43:31", "remaining_time": "0:01:37", "throughput": 2500.41, "total_tokens": 6528688}
3205
+ {"current_steps": 15930, "total_steps": 16520, "loss": 0.1014, "lr": 1.94677231077664e-07, "epoch": 19.285714285714285, "percentage": 96.43, "elapsed_time": "0:43:31", "remaining_time": "0:01:36", "throughput": 2500.46, "total_tokens": 6530704}
3206
+ {"current_steps": 15935, "total_steps": 16520, "loss": 0.1056, "lr": 1.9140132614666463e-07, "epoch": 19.291767554479417, "percentage": 96.46, "elapsed_time": "0:43:32", "remaining_time": "0:01:35", "throughput": 2500.45, "total_tokens": 6532720}
3207
+ {"current_steps": 15940, "total_steps": 16520, "loss": 0.0894, "lr": 1.881531121010749e-07, "epoch": 19.297820823244553, "percentage": 96.49, "elapsed_time": "0:43:33", "remaining_time": "0:01:35", "throughput": 2500.45, "total_tokens": 6534832}
3208
+ {"current_steps": 15945, "total_steps": 16520, "loss": 0.0899, "lr": 1.8493259256649186e-07, "epoch": 19.303874092009686, "percentage": 96.52, "elapsed_time": "0:43:34", "remaining_time": "0:01:34", "throughput": 2500.5, "total_tokens": 6537008}
3209
+ {"current_steps": 15950, "total_steps": 16520, "loss": 0.0562, "lr": 1.8173977113759288e-07, "epoch": 19.309927360774818, "percentage": 96.55, "elapsed_time": "0:43:35", "remaining_time": "0:01:33", "throughput": 2500.51, "total_tokens": 6539056}
3210
+ {"current_steps": 15955, "total_steps": 16520, "loss": 0.061, "lr": 1.7857465137814944e-07, "epoch": 19.31598062953995, "percentage": 96.58, "elapsed_time": "0:43:35", "remaining_time": "0:01:32", "throughput": 2500.57, "total_tokens": 6541008}
3211
+ {"current_steps": 15960, "total_steps": 16520, "loss": 0.3004, "lr": 1.7543723682100777e-07, "epoch": 19.322033898305083, "percentage": 96.61, "elapsed_time": "0:43:36", "remaining_time": "0:01:31", "throughput": 2500.61, "total_tokens": 6542960}
3212
+ {"current_steps": 15965, "total_steps": 16520, "loss": 0.0643, "lr": 1.7232753096808607e-07, "epoch": 19.32808716707022, "percentage": 96.64, "elapsed_time": "0:43:37", "remaining_time": "0:01:30", "throughput": 2500.66, "total_tokens": 6545072}
3213
+ {"current_steps": 15970, "total_steps": 16520, "loss": 0.085, "lr": 1.6924553729038285e-07, "epoch": 19.33414043583535, "percentage": 96.67, "elapsed_time": "0:43:38", "remaining_time": "0:01:30", "throughput": 2500.71, "total_tokens": 6547248}
3214
+ {"current_steps": 15975, "total_steps": 16520, "loss": 0.08, "lr": 1.661912592279602e-07, "epoch": 19.340193704600484, "percentage": 96.7, "elapsed_time": "0:43:38", "remaining_time": "0:01:29", "throughput": 2500.76, "total_tokens": 6549424}
3215
+ {"current_steps": 15980, "total_steps": 16520, "loss": 0.1215, "lr": 1.6316470018994112e-07, "epoch": 19.346246973365616, "percentage": 96.73, "elapsed_time": "0:43:39", "remaining_time": "0:01:28", "throughput": 2500.78, "total_tokens": 6551440}
3216
+ {"current_steps": 15985, "total_steps": 16520, "loss": 0.1127, "lr": 1.6016586355452056e-07, "epoch": 19.352300242130752, "percentage": 96.76, "elapsed_time": "0:43:40", "remaining_time": "0:01:27", "throughput": 2500.85, "total_tokens": 6553392}
3217
+ {"current_steps": 15990, "total_steps": 16520, "loss": 0.0578, "lr": 1.571947526689349e-07, "epoch": 19.358353510895885, "percentage": 96.79, "elapsed_time": "0:43:41", "remaining_time": "0:01:26", "throughput": 2500.86, "total_tokens": 6555568}
3218
+ {"current_steps": 15995, "total_steps": 16520, "loss": 0.1106, "lr": 1.5425137084948692e-07, "epoch": 19.364406779661017, "percentage": 96.82, "elapsed_time": "0:43:42", "remaining_time": "0:01:26", "throughput": 2500.93, "total_tokens": 6557808}
3219
+ {"current_steps": 16000, "total_steps": 16520, "loss": 0.0871, "lr": 1.5133572138152364e-07, "epoch": 19.37046004842615, "percentage": 96.85, "elapsed_time": "0:43:42", "remaining_time": "0:01:25", "throughput": 2500.97, "total_tokens": 6559856}
3220
+ {"current_steps": 16005, "total_steps": 16520, "loss": 0.1203, "lr": 1.4844780751943345e-07, "epoch": 19.376513317191282, "percentage": 96.88, "elapsed_time": "0:43:43", "remaining_time": "0:01:24", "throughput": 2501.01, "total_tokens": 6561840}
3221
+ {"current_steps": 16010, "total_steps": 16520, "loss": 0.0681, "lr": 1.4558763248665175e-07, "epoch": 19.38256658595642, "percentage": 96.91, "elapsed_time": "0:43:44", "remaining_time": "0:01:23", "throughput": 2501.04, "total_tokens": 6563792}
3222
+ {"current_steps": 16015, "total_steps": 16520, "loss": 0.0904, "lr": 1.4275519947565542e-07, "epoch": 19.38861985472155, "percentage": 96.94, "elapsed_time": "0:43:45", "remaining_time": "0:01:22", "throughput": 2501.04, "total_tokens": 6565744}
3223
+ {"current_steps": 16020, "total_steps": 16520, "loss": 0.0953, "lr": 1.3995051164794604e-07, "epoch": 19.394673123486683, "percentage": 96.97, "elapsed_time": "0:43:45", "remaining_time": "0:01:21", "throughput": 2501.06, "total_tokens": 6567728}
3224
+ {"current_steps": 16025, "total_steps": 16520, "loss": 0.1126, "lr": 1.3717357213406667e-07, "epoch": 19.400726392251816, "percentage": 97.0, "elapsed_time": "0:43:46", "remaining_time": "0:01:21", "throughput": 2501.11, "total_tokens": 6569840}
3225
+ {"current_steps": 16030, "total_steps": 16520, "loss": 0.0564, "lr": 1.3442438403358515e-07, "epoch": 19.406779661016948, "percentage": 97.03, "elapsed_time": "0:43:47", "remaining_time": "0:01:20", "throughput": 2501.14, "total_tokens": 6571856}
3226
+ {"current_steps": 16035, "total_steps": 16520, "loss": 0.1452, "lr": 1.3170295041509128e-07, "epoch": 19.412832929782084, "percentage": 97.06, "elapsed_time": "0:43:48", "remaining_time": "0:01:19", "throughput": 2501.18, "total_tokens": 6574096}
3227
+ {"current_steps": 16040, "total_steps": 16520, "loss": 0.0729, "lr": 1.290092743161997e-07, "epoch": 19.418886198547217, "percentage": 97.09, "elapsed_time": "0:43:49", "remaining_time": "0:01:18", "throughput": 2501.2, "total_tokens": 6576208}
3228
+ {"current_steps": 16045, "total_steps": 16520, "loss": 0.0935, "lr": 1.2634335874353585e-07, "epoch": 19.42493946731235, "percentage": 97.12, "elapsed_time": "0:43:50", "remaining_time": "0:01:17", "throughput": 2501.21, "total_tokens": 6578352}
3229
+ {"current_steps": 16050, "total_steps": 16520, "loss": 0.1173, "lr": 1.2370520667274733e-07, "epoch": 19.43099273607748, "percentage": 97.15, "elapsed_time": "0:43:50", "remaining_time": "0:01:17", "throughput": 2501.21, "total_tokens": 6580368}
3230
+ {"current_steps": 16055, "total_steps": 16520, "loss": 0.1483, "lr": 1.2109482104848692e-07, "epoch": 19.437046004842614, "percentage": 97.19, "elapsed_time": "0:43:51", "remaining_time": "0:01:16", "throughput": 2501.23, "total_tokens": 6582384}
3231
+ {"current_steps": 16060, "total_steps": 16520, "loss": 0.0475, "lr": 1.1851220478442115e-07, "epoch": 19.44309927360775, "percentage": 97.22, "elapsed_time": "0:43:52", "remaining_time": "0:01:15", "throughput": 2501.25, "total_tokens": 6584464}
3232
+ {"current_steps": 16065, "total_steps": 16520, "loss": 0.2136, "lr": 1.1595736076321362e-07, "epoch": 19.449152542372882, "percentage": 97.25, "elapsed_time": "0:43:53", "remaining_time": "0:01:14", "throughput": 2501.31, "total_tokens": 6586416}
3233
+ {"current_steps": 16070, "total_steps": 16520, "loss": 0.0807, "lr": 1.134302918365332e-07, "epoch": 19.455205811138015, "percentage": 97.28, "elapsed_time": "0:43:53", "remaining_time": "0:01:13", "throughput": 2501.34, "total_tokens": 6588272}
3234
+ {"current_steps": 16075, "total_steps": 16520, "loss": 0.0745, "lr": 1.1093100082504581e-07, "epoch": 19.461259079903147, "percentage": 97.31, "elapsed_time": "0:43:54", "remaining_time": "0:01:12", "throughput": 2501.39, "total_tokens": 6590352}
3235
+ {"current_steps": 16080, "total_steps": 16520, "loss": 0.0968, "lr": 1.0845949051841441e-07, "epoch": 19.46731234866828, "percentage": 97.34, "elapsed_time": "0:43:55", "remaining_time": "0:01:12", "throughput": 2501.41, "total_tokens": 6592368}
3236
+ {"current_steps": 16085, "total_steps": 16520, "loss": 0.1033, "lr": 1.0601576367529065e-07, "epoch": 19.473365617433416, "percentage": 97.37, "elapsed_time": "0:43:56", "remaining_time": "0:01:11", "throughput": 2501.46, "total_tokens": 6594352}
3237
+ {"current_steps": 16090, "total_steps": 16520, "loss": 0.1622, "lr": 1.0359982302331484e-07, "epoch": 19.479418886198548, "percentage": 97.4, "elapsed_time": "0:43:56", "remaining_time": "0:01:10", "throughput": 2501.52, "total_tokens": 6596304}
3238
+ {"current_steps": 16095, "total_steps": 16520, "loss": 0.0878, "lr": 1.0121167125911601e-07, "epoch": 19.48547215496368, "percentage": 97.43, "elapsed_time": "0:43:57", "remaining_time": "0:01:09", "throughput": 2501.55, "total_tokens": 6598320}
3239
+ {"current_steps": 16100, "total_steps": 16520, "loss": 0.1023, "lr": 9.885131104830358e-08, "epoch": 19.491525423728813, "percentage": 97.46, "elapsed_time": "0:43:58", "remaining_time": "0:01:08", "throughput": 2501.57, "total_tokens": 6600496}
3240
+ {"current_steps": 16105, "total_steps": 16520, "loss": 0.0842, "lr": 9.651874502546454e-08, "epoch": 19.497578692493946, "percentage": 97.49, "elapsed_time": "0:43:59", "remaining_time": "0:01:08", "throughput": 2501.6, "total_tokens": 6602704}
3241
+ {"current_steps": 16110, "total_steps": 16520, "loss": 0.0744, "lr": 9.421397579416625e-08, "epoch": 19.50363196125908, "percentage": 97.52, "elapsed_time": "0:44:00", "remaining_time": "0:01:07", "throughput": 2501.62, "total_tokens": 6604784}
3242
+ {"current_steps": 16115, "total_steps": 16520, "loss": 0.0727, "lr": 9.193700592694532e-08, "epoch": 19.509685230024214, "percentage": 97.55, "elapsed_time": "0:44:00", "remaining_time": "0:01:06", "throughput": 2501.65, "total_tokens": 6606736}
3243
+ {"current_steps": 16120, "total_steps": 16520, "loss": 0.0366, "lr": 8.9687837965316e-08, "epoch": 19.515738498789347, "percentage": 97.58, "elapsed_time": "0:44:01", "remaining_time": "0:01:05", "throughput": 2501.64, "total_tokens": 6608752}
3244
+ {"current_steps": 16125, "total_steps": 16520, "loss": 0.0643, "lr": 8.74664744197562e-08, "epoch": 19.52179176755448, "percentage": 97.61, "elapsed_time": "0:44:02", "remaining_time": "0:01:04", "throughput": 2501.65, "total_tokens": 6610800}
3245
+ {"current_steps": 16130, "total_steps": 16520, "loss": 0.0584, "lr": 8.527291776970759e-08, "epoch": 19.52784503631961, "percentage": 97.64, "elapsed_time": "0:44:03", "remaining_time": "0:01:03", "throughput": 2501.7, "total_tokens": 6613040}
3246
+ {"current_steps": 16135, "total_steps": 16520, "loss": 0.0512, "lr": 8.310717046358108e-08, "epoch": 19.533898305084747, "percentage": 97.67, "elapsed_time": "0:44:04", "remaining_time": "0:01:03", "throughput": 2501.75, "total_tokens": 6615312}
3247
+ {"current_steps": 16140, "total_steps": 16520, "loss": 0.0478, "lr": 8.096923491873465e-08, "epoch": 19.53995157384988, "percentage": 97.7, "elapsed_time": "0:44:05", "remaining_time": "0:01:02", "throughput": 2501.78, "total_tokens": 6617424}
3248
+ {"current_steps": 16145, "total_steps": 16520, "loss": 0.0742, "lr": 7.885911352149832e-08, "epoch": 19.546004842615012, "percentage": 97.73, "elapsed_time": "0:44:05", "remaining_time": "0:01:01", "throughput": 2501.81, "total_tokens": 6619376}
3249
+ {"current_steps": 16150, "total_steps": 16520, "loss": 0.1319, "lr": 7.677680862714365e-08, "epoch": 19.552058111380145, "percentage": 97.76, "elapsed_time": "0:44:06", "remaining_time": "0:01:00", "throughput": 2501.84, "total_tokens": 6621520}
3250
+ {"current_steps": 16155, "total_steps": 16520, "loss": 0.0987, "lr": 7.472232255990585e-08, "epoch": 19.558111380145277, "percentage": 97.79, "elapsed_time": "0:44:07", "remaining_time": "0:00:59", "throughput": 2501.88, "total_tokens": 6623472}
3251
+ {"current_steps": 16160, "total_steps": 16520, "loss": 0.1098, "lr": 7.269565761295893e-08, "epoch": 19.564164648910413, "percentage": 97.82, "elapsed_time": "0:44:08", "remaining_time": "0:00:58", "throughput": 2501.88, "total_tokens": 6625424}
3252
+ {"current_steps": 16165, "total_steps": 16520, "loss": 0.0902, "lr": 7.069681604842949e-08, "epoch": 19.570217917675546, "percentage": 97.85, "elapsed_time": "0:44:08", "remaining_time": "0:00:58", "throughput": 2501.94, "total_tokens": 6627472}
3253
+ {"current_steps": 16170, "total_steps": 16520, "loss": 0.146, "lr": 6.872580009738283e-08, "epoch": 19.576271186440678, "percentage": 97.88, "elapsed_time": "0:44:09", "remaining_time": "0:00:57", "throughput": 2502.0, "total_tokens": 6629520}
3254
+ {"current_steps": 16175, "total_steps": 16520, "loss": 0.0739, "lr": 6.678261195983693e-08, "epoch": 19.58232445520581, "percentage": 97.91, "elapsed_time": "0:44:10", "remaining_time": "0:00:56", "throughput": 2502.04, "total_tokens": 6631472}
3255
+ {"current_steps": 16180, "total_steps": 16520, "loss": 0.1218, "lr": 6.486725380473457e-08, "epoch": 19.588377723970943, "percentage": 97.94, "elapsed_time": "0:44:11", "remaining_time": "0:00:55", "throughput": 2502.05, "total_tokens": 6633456}
3256
+ {"current_steps": 16185, "total_steps": 16520, "loss": 0.1735, "lr": 6.297972776996286e-08, "epoch": 19.59443099273608, "percentage": 97.97, "elapsed_time": "0:44:11", "remaining_time": "0:00:54", "throughput": 2502.09, "total_tokens": 6635504}
3257
+ {"current_steps": 16190, "total_steps": 16520, "loss": 0.0861, "lr": 6.112003596234484e-08, "epoch": 19.60048426150121, "percentage": 98.0, "elapsed_time": "0:44:12", "remaining_time": "0:00:54", "throughput": 2502.15, "total_tokens": 6637712}
3258
+ {"current_steps": 16195, "total_steps": 16520, "loss": 0.0759, "lr": 5.9288180457633954e-08, "epoch": 19.606537530266344, "percentage": 98.03, "elapsed_time": "0:44:13", "remaining_time": "0:00:53", "throughput": 2502.2, "total_tokens": 6639728}
3259
+ {"current_steps": 16200, "total_steps": 16520, "loss": 0.1311, "lr": 5.7484163300508545e-08, "epoch": 19.612590799031477, "percentage": 98.06, "elapsed_time": "0:44:14", "remaining_time": "0:00:52", "throughput": 2502.23, "total_tokens": 6641744}
3260
+ {"current_steps": 16205, "total_steps": 16520, "loss": 0.053, "lr": 5.570798650458009e-08, "epoch": 19.61864406779661, "percentage": 98.09, "elapsed_time": "0:44:15", "remaining_time": "0:00:51", "throughput": 2502.28, "total_tokens": 6643760}
3261
+ {"current_steps": 16210, "total_steps": 16520, "loss": 0.0521, "lr": 5.3959652052384954e-08, "epoch": 19.624697336561745, "percentage": 98.12, "elapsed_time": "0:44:15", "remaining_time": "0:00:50", "throughput": 2502.33, "total_tokens": 6645776}
3262
+ {"current_steps": 16215, "total_steps": 16520, "loss": 0.0774, "lr": 5.2239161895378806e-08, "epoch": 19.630750605326877, "percentage": 98.15, "elapsed_time": "0:44:16", "remaining_time": "0:00:49", "throughput": 2502.38, "total_tokens": 6647856}
3263
+ {"current_steps": 16220, "total_steps": 16520, "loss": 0.0532, "lr": 5.054651795393939e-08, "epoch": 19.63680387409201, "percentage": 98.18, "elapsed_time": "0:44:17", "remaining_time": "0:00:49", "throughput": 2502.4, "total_tokens": 6649936}
3264
+ {"current_steps": 16225, "total_steps": 16520, "loss": 0.1399, "lr": 4.888172211736375e-08, "epoch": 19.642857142857142, "percentage": 98.21, "elapsed_time": "0:44:18", "remaining_time": "0:00:48", "throughput": 2502.42, "total_tokens": 6651952}
3265
+ {"current_steps": 16230, "total_steps": 16520, "loss": 0.1209, "lr": 4.724477624386825e-08, "epoch": 19.648910411622275, "percentage": 98.24, "elapsed_time": "0:44:19", "remaining_time": "0:00:47", "throughput": 2502.45, "total_tokens": 6654064}
3266
+ {"current_steps": 16235, "total_steps": 16520, "loss": 0.1066, "lr": 4.563568216057745e-08, "epoch": 19.65496368038741, "percentage": 98.27, "elapsed_time": "0:44:19", "remaining_time": "0:00:46", "throughput": 2502.47, "total_tokens": 6656048}
3267
+ {"current_steps": 16240, "total_steps": 16520, "loss": 0.1081, "lr": 4.405444166353523e-08, "epoch": 19.661016949152543, "percentage": 98.31, "elapsed_time": "0:44:20", "remaining_time": "0:00:45", "throughput": 2502.5, "total_tokens": 6658096}
3268
+ {"current_steps": 16245, "total_steps": 16520, "loss": 0.1031, "lr": 4.25010565176881e-08, "epoch": 19.667070217917676, "percentage": 98.34, "elapsed_time": "0:44:21", "remaining_time": "0:00:45", "throughput": 2502.51, "total_tokens": 6660080}
3269
+ {"current_steps": 16250, "total_steps": 16520, "loss": 0.1435, "lr": 4.097552845689634e-08, "epoch": 19.673123486682808, "percentage": 98.37, "elapsed_time": "0:44:22", "remaining_time": "0:00:44", "throughput": 2502.54, "total_tokens": 6662032}
3270
+ {"current_steps": 16255, "total_steps": 16520, "loss": 0.147, "lr": 3.9477859183925657e-08, "epoch": 19.67917675544794, "percentage": 98.4, "elapsed_time": "0:44:22", "remaining_time": "0:00:43", "throughput": 2502.6, "total_tokens": 6664240}
3271
+ {"current_steps": 16260, "total_steps": 16520, "loss": 0.046, "lr": 3.8008050370444415e-08, "epoch": 19.685230024213077, "percentage": 98.43, "elapsed_time": "0:44:23", "remaining_time": "0:00:42", "throughput": 2502.59, "total_tokens": 6666256}
3272
+ {"current_steps": 16265, "total_steps": 16520, "loss": 0.1164, "lr": 3.656610365702917e-08, "epoch": 19.69128329297821, "percentage": 98.46, "elapsed_time": "0:44:24", "remaining_time": "0:00:41", "throughput": 2502.61, "total_tokens": 6668432}
3273
+ {"current_steps": 16270, "total_steps": 16520, "loss": 0.0867, "lr": 3.515202065314804e-08, "epoch": 19.69733656174334, "percentage": 98.49, "elapsed_time": "0:44:25", "remaining_time": "0:00:40", "throughput": 2502.63, "total_tokens": 6670416}
3274
+ {"current_steps": 16275, "total_steps": 16520, "loss": 0.1577, "lr": 3.3765802937177346e-08, "epoch": 19.703389830508474, "percentage": 98.52, "elapsed_time": "0:44:26", "remaining_time": "0:00:40", "throughput": 2502.65, "total_tokens": 6672432}
3275
+ {"current_steps": 16280, "total_steps": 16520, "loss": 0.0647, "lr": 3.240745205638773e-08, "epoch": 19.709443099273606, "percentage": 98.55, "elapsed_time": "0:44:26", "remaining_time": "0:00:39", "throughput": 2502.7, "total_tokens": 6674512}
3276
+ {"current_steps": 16285, "total_steps": 16520, "loss": 0.127, "lr": 3.107696952694139e-08, "epoch": 19.715496368038743, "percentage": 98.58, "elapsed_time": "0:44:27", "remaining_time": "0:00:38", "throughput": 2502.72, "total_tokens": 6676592}
3277
+ {"current_steps": 16290, "total_steps": 16520, "loss": 0.1224, "lr": 2.977435683389762e-08, "epoch": 19.721549636803875, "percentage": 98.61, "elapsed_time": "0:44:28", "remaining_time": "0:00:37", "throughput": 2502.76, "total_tokens": 6678832}
3278
+ {"current_steps": 16295, "total_steps": 16520, "loss": 0.0909, "lr": 2.8499615431212824e-08, "epoch": 19.727602905569007, "percentage": 98.64, "elapsed_time": "0:44:29", "remaining_time": "0:00:36", "throughput": 2502.82, "total_tokens": 6680880}
3279
+ {"current_steps": 16300, "total_steps": 16520, "loss": 0.3212, "lr": 2.725274674172107e-08, "epoch": 19.73365617433414, "percentage": 98.67, "elapsed_time": "0:44:30", "remaining_time": "0:00:36", "throughput": 2502.85, "total_tokens": 6683088}
3280
+ {"current_steps": 16305, "total_steps": 16520, "loss": 0.0767, "lr": 2.6033752157161862e-08, "epoch": 19.739709443099272, "percentage": 98.7, "elapsed_time": "0:44:31", "remaining_time": "0:00:35", "throughput": 2502.88, "total_tokens": 6685296}
3281
+ {"current_steps": 16310, "total_steps": 16520, "loss": 0.1128, "lr": 2.4842633038146822e-08, "epoch": 19.74576271186441, "percentage": 98.73, "elapsed_time": "0:44:31", "remaining_time": "0:00:34", "throughput": 2502.92, "total_tokens": 6687312}
3282
+ {"current_steps": 16315, "total_steps": 16520, "loss": 0.1283, "lr": 2.367939071418468e-08, "epoch": 19.75181598062954, "percentage": 98.76, "elapsed_time": "0:44:32", "remaining_time": "0:00:33", "throughput": 2502.96, "total_tokens": 6689264}
3283
+ {"current_steps": 16320, "total_steps": 16520, "loss": 0.0886, "lr": 2.2544026483664606e-08, "epoch": 19.757869249394673, "percentage": 98.79, "elapsed_time": "0:44:33", "remaining_time": "0:00:32", "throughput": 2503.0, "total_tokens": 6691344}
3284
+ {"current_steps": 16325, "total_steps": 16520, "loss": 0.1182, "lr": 2.1436541613853444e-08, "epoch": 19.763922518159806, "percentage": 98.82, "elapsed_time": "0:44:34", "remaining_time": "0:00:31", "throughput": 2503.02, "total_tokens": 6693264}
3285
+ {"current_steps": 16330, "total_steps": 16520, "loss": 0.1969, "lr": 2.03569373409096e-08, "epoch": 19.769975786924938, "percentage": 98.85, "elapsed_time": "0:44:34", "remaining_time": "0:00:31", "throughput": 2503.08, "total_tokens": 6695312}
3286
+ {"current_steps": 16335, "total_steps": 16520, "loss": 0.1343, "lr": 1.930521486986636e-08, "epoch": 19.776029055690074, "percentage": 98.88, "elapsed_time": "0:44:35", "remaining_time": "0:00:30", "throughput": 2503.13, "total_tokens": 6697424}
3287
+ {"current_steps": 16340, "total_steps": 16520, "loss": 0.1264, "lr": 1.8281375374634702e-08, "epoch": 19.782082324455207, "percentage": 98.91, "elapsed_time": "0:44:36", "remaining_time": "0:00:29", "throughput": 2503.16, "total_tokens": 6699632}
3288
+ {"current_steps": 16345, "total_steps": 16520, "loss": 0.0878, "lr": 1.7285419998006035e-08, "epoch": 19.78813559322034, "percentage": 98.94, "elapsed_time": "0:44:37", "remaining_time": "0:00:28", "throughput": 2503.19, "total_tokens": 6701680}
3289
+ {"current_steps": 16350, "total_steps": 16520, "loss": 0.1115, "lr": 1.6317349851646678e-08, "epoch": 19.79418886198547, "percentage": 98.97, "elapsed_time": "0:44:38", "remaining_time": "0:00:27", "throughput": 2503.23, "total_tokens": 6703760}
3290
+ {"current_steps": 16355, "total_steps": 16520, "loss": 0.1209, "lr": 1.5377166016097844e-08, "epoch": 19.800242130750604, "percentage": 99.0, "elapsed_time": "0:44:38", "remaining_time": "0:00:27", "throughput": 2503.3, "total_tokens": 6705840}
3291
+ {"current_steps": 16360, "total_steps": 16520, "loss": 0.0783, "lr": 1.4464869540772863e-08, "epoch": 19.80629539951574, "percentage": 99.03, "elapsed_time": "0:44:39", "remaining_time": "0:00:26", "throughput": 2503.31, "total_tokens": 6707664}
3292
+ {"current_steps": 16365, "total_steps": 16520, "loss": 0.0699, "lr": 1.3580461443962743e-08, "epoch": 19.812348668280872, "percentage": 99.06, "elapsed_time": "0:44:40", "remaining_time": "0:00:25", "throughput": 2503.35, "total_tokens": 6709648}
3293
+ {"current_steps": 16370, "total_steps": 16520, "loss": 0.0688, "lr": 1.2723942712825065e-08, "epoch": 19.818401937046005, "percentage": 99.09, "elapsed_time": "0:44:41", "remaining_time": "0:00:24", "throughput": 2503.41, "total_tokens": 6711664}
3294
+ {"current_steps": 16375, "total_steps": 16520, "loss": 0.0654, "lr": 1.1895314303389526e-08, "epoch": 19.824455205811137, "percentage": 99.12, "elapsed_time": "0:44:41", "remaining_time": "0:00:23", "throughput": 2503.43, "total_tokens": 6713872}
3295
+ {"current_steps": 16380, "total_steps": 16520, "loss": 0.1064, "lr": 1.109457714055795e-08, "epoch": 19.83050847457627, "percentage": 99.15, "elapsed_time": "0:44:42", "remaining_time": "0:00:22", "throughput": 2503.47, "total_tokens": 6716016}
3296
+ {"current_steps": 16385, "total_steps": 16520, "loss": 0.2045, "lr": 1.0321732118095951e-08, "epoch": 19.836561743341406, "percentage": 99.18, "elapsed_time": "0:44:43", "remaining_time": "0:00:22", "throughput": 2503.51, "total_tokens": 6718096}
3297
+ {"current_steps": 16390, "total_steps": 16520, "loss": 0.0923, "lr": 9.576780098638494e-09, "epoch": 19.84261501210654, "percentage": 99.21, "elapsed_time": "0:44:44", "remaining_time": "0:00:21", "throughput": 2503.55, "total_tokens": 6720144}
3298
+ {"current_steps": 16395, "total_steps": 16520, "loss": 0.0686, "lr": 8.859721913684339e-09, "epoch": 19.84866828087167, "percentage": 99.24, "elapsed_time": "0:44:45", "remaining_time": "0:00:20", "throughput": 2503.58, "total_tokens": 6722192}
3299
+ {"current_steps": 16400, "total_steps": 16520, "loss": 0.1682, "lr": 8.170558363607139e-09, "epoch": 19.854721549636803, "percentage": 99.27, "elapsed_time": "0:44:45", "remaining_time": "0:00:19", "throughput": 2503.6, "total_tokens": 6724272}
3300
+ {"current_steps": 16405, "total_steps": 16520, "loss": 0.1251, "lr": 7.50929021763047e-09, "epoch": 19.860774818401936, "percentage": 99.3, "elapsed_time": "0:44:46", "remaining_time": "0:00:18", "throughput": 2503.64, "total_tokens": 6726256}
3301
+ {"current_steps": 16410, "total_steps": 16520, "loss": 0.1708, "lr": 6.8759182138528055e-09, "epoch": 19.86682808716707, "percentage": 99.33, "elapsed_time": "0:44:47", "remaining_time": "0:00:18", "throughput": 2503.66, "total_tokens": 6728272}
3302
+ {"current_steps": 16415, "total_steps": 16520, "loss": 0.1333, "lr": 6.2704430592336326e-09, "epoch": 19.872881355932204, "percentage": 99.36, "elapsed_time": "0:44:48", "remaining_time": "0:00:17", "throughput": 2503.66, "total_tokens": 6730288}
3303
+ {"current_steps": 16420, "total_steps": 16520, "loss": 0.1167, "lr": 5.692865429590688e-09, "epoch": 19.878934624697337, "percentage": 99.39, "elapsed_time": "0:44:48", "remaining_time": "0:00:16", "throughput": 2503.7, "total_tokens": 6732368}
3304
+ {"current_steps": 16425, "total_steps": 16520, "loss": 0.1354, "lr": 5.143185969602726e-09, "epoch": 19.88498789346247, "percentage": 99.42, "elapsed_time": "0:44:49", "remaining_time": "0:00:15", "throughput": 2503.71, "total_tokens": 6734416}
3305
+ {"current_steps": 16430, "total_steps": 16520, "loss": 0.0694, "lr": 4.6214052928150734e-09, "epoch": 19.8910411622276, "percentage": 99.46, "elapsed_time": "0:44:50", "remaining_time": "0:00:14", "throughput": 2503.75, "total_tokens": 6736496}
3306
+ {"current_steps": 16435, "total_steps": 16520, "loss": 0.0411, "lr": 4.127523981631298e-09, "epoch": 19.897094430992738, "percentage": 99.49, "elapsed_time": "0:44:51", "remaining_time": "0:00:13", "throughput": 2503.81, "total_tokens": 6738448}
3307
+ {"current_steps": 16440, "total_steps": 16520, "loss": 0.1084, "lr": 3.661542587304889e-09, "epoch": 19.90314769975787, "percentage": 99.52, "elapsed_time": "0:44:52", "remaining_time": "0:00:13", "throughput": 2503.82, "total_tokens": 6740496}
3308
+ {"current_steps": 16445, "total_steps": 16520, "loss": 0.1617, "lr": 3.2234616299642306e-09, "epoch": 19.909200968523002, "percentage": 99.55, "elapsed_time": "0:44:52", "remaining_time": "0:00:12", "throughput": 2503.88, "total_tokens": 6742736}
3309
+ {"current_steps": 16450, "total_steps": 16520, "loss": 0.0769, "lr": 2.813281598579298e-09, "epoch": 19.915254237288135, "percentage": 99.58, "elapsed_time": "0:44:53", "remaining_time": "0:00:11", "throughput": 2503.95, "total_tokens": 6744688}
3310
+ {"current_steps": 16455, "total_steps": 16520, "loss": 0.0694, "lr": 2.431002950989414e-09, "epoch": 19.921307506053267, "percentage": 99.61, "elapsed_time": "0:44:54", "remaining_time": "0:00:10", "throughput": 2503.99, "total_tokens": 6746672}
3311
+ {"current_steps": 16460, "total_steps": 16520, "loss": 0.0873, "lr": 2.076626113886593e-09, "epoch": 19.927360774818403, "percentage": 99.64, "elapsed_time": "0:44:55", "remaining_time": "0:00:09", "throughput": 2504.02, "total_tokens": 6748784}
3312
+ {"current_steps": 16465, "total_steps": 16520, "loss": 0.1012, "lr": 1.7501514828183185e-09, "epoch": 19.933414043583536, "percentage": 99.67, "elapsed_time": "0:44:55", "remaining_time": "0:00:09", "throughput": 2504.05, "total_tokens": 6750896}
3313
+ {"current_steps": 16470, "total_steps": 16520, "loss": 0.083, "lr": 1.4515794221875434e-09, "epoch": 19.93946731234867, "percentage": 99.7, "elapsed_time": "0:44:56", "remaining_time": "0:00:08", "throughput": 2504.06, "total_tokens": 6752880}
3314
+ {"current_steps": 16475, "total_steps": 16520, "loss": 0.0252, "lr": 1.1809102652610148e-09, "epoch": 19.9455205811138, "percentage": 99.73, "elapsed_time": "0:44:57", "remaining_time": "0:00:07", "throughput": 2504.09, "total_tokens": 6754832}
3315
+ {"current_steps": 16480, "total_steps": 16520, "loss": 0.07, "lr": 9.381443141470714e-10, "epoch": 19.951573849878933, "percentage": 99.76, "elapsed_time": "0:44:58", "remaining_time": "0:00:06", "throughput": 2504.17, "total_tokens": 6757104}
3316
+ {"current_steps": 16485, "total_steps": 16520, "loss": 0.0529, "lr": 7.23281839820622e-10, "epoch": 19.95762711864407, "percentage": 99.79, "elapsed_time": "0:44:59", "remaining_time": "0:00:05", "throughput": 2504.21, "total_tokens": 6759088}
3317
+ {"current_steps": 16490, "total_steps": 16520, "loss": 0.1578, "lr": 5.363230821064935e-10, "epoch": 19.9636803874092, "percentage": 99.82, "elapsed_time": "0:44:59", "remaining_time": "0:00:04", "throughput": 2504.28, "total_tokens": 6761072}
3318
+ {"current_steps": 16495, "total_steps": 16520, "loss": 0.1025, "lr": 3.772682496849811e-10, "epoch": 19.969733656174334, "percentage": 99.85, "elapsed_time": "0:45:00", "remaining_time": "0:00:04", "throughput": 2504.33, "total_tokens": 6763120}
3319
+ {"current_steps": 16500, "total_steps": 16520, "loss": 0.1318, "lr": 2.4611752008907307e-10, "epoch": 19.975786924939467, "percentage": 99.88, "elapsed_time": "0:45:01", "remaining_time": "0:00:03", "throughput": 2504.36, "total_tokens": 6765136}
3320
+ {"current_steps": 16505, "total_steps": 16520, "loss": 0.1155, "lr": 1.4287103970722638e-10, "epoch": 19.9818401937046, "percentage": 99.91, "elapsed_time": "0:45:02", "remaining_time": "0:00:02", "throughput": 2504.38, "total_tokens": 6767248}
3321
+ {"current_steps": 16510, "total_steps": 16520, "loss": 0.0996, "lr": 6.752892378059095e-11, "epoch": 19.987893462469735, "percentage": 99.94, "elapsed_time": "0:45:02", "remaining_time": "0:00:01", "throughput": 2504.42, "total_tokens": 6769328}
3322
+ {"current_steps": 16515, "total_steps": 16520, "loss": 0.0904, "lr": 2.0091256403009794e-11, "epoch": 19.993946731234868, "percentage": 99.97, "elapsed_time": "0:45:03", "remaining_time": "0:00:00", "throughput": 2504.47, "total_tokens": 6771472}
3323
+ {"current_steps": 16520, "total_steps": 16520, "loss": 0.0791, "lr": 5.58090529345634e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:45:04", "remaining_time": "0:00:00", "throughput": 2504.35, "total_tokens": 6773216}
3324
+ {"current_steps": 16520, "total_steps": 16520, "eval_loss": 0.14046552777290344, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:45:09", "remaining_time": "0:00:00", "throughput": 2499.98, "total_tokens": 6773216}
3325
+ {"current_steps": 16520, "total_steps": 16520, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:45:10", "remaining_time": "0:00:00", "throughput": 2499.25, "total_tokens": 6773216}