rbelanec commited on
Commit
622da26
·
verified ·
1 Parent(s): 7410437

Training in progress, step 32708

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +385 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d81aa2d444ff9bb56d535ad6813247381f32318c687c9e8445ce342cebfb6c96
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849d41327d28828dce8ca4960854b0eb391d9a9ff7b731788ac650e5ca2ac1f5
3
  size 798032
trainer_log.jsonl CHANGED
@@ -6174,3 +6174,388 @@
6174
  {"current_steps": 30790, "total_steps": 38480, "loss": 0.0642, "lr": 5.841602196652241e-06, "epoch": 16.003118503118504, "percentage": 80.02, "elapsed_time": "1:14:12", "remaining_time": "0:18:31", "throughput": 1318.31, "total_tokens": 5869136}
6175
  {"current_steps": 30795, "total_steps": 38480, "loss": 0.1117, "lr": 5.834319412617911e-06, "epoch": 16.005717255717254, "percentage": 80.03, "elapsed_time": "1:14:12", "remaining_time": "0:18:31", "throughput": 1318.3, "total_tokens": 5870032}
6176
  {"current_steps": 30800, "total_steps": 38480, "loss": 0.0992, "lr": 5.827040571420792e-06, "epoch": 16.008316008316008, "percentage": 80.04, "elapsed_time": "1:14:13", "remaining_time": "0:18:30", "throughput": 1318.29, "total_tokens": 5870928}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6174
  {"current_steps": 30790, "total_steps": 38480, "loss": 0.0642, "lr": 5.841602196652241e-06, "epoch": 16.003118503118504, "percentage": 80.02, "elapsed_time": "1:14:12", "remaining_time": "0:18:31", "throughput": 1318.31, "total_tokens": 5869136}
6175
  {"current_steps": 30795, "total_steps": 38480, "loss": 0.1117, "lr": 5.834319412617911e-06, "epoch": 16.005717255717254, "percentage": 80.03, "elapsed_time": "1:14:12", "remaining_time": "0:18:31", "throughput": 1318.3, "total_tokens": 5870032}
6176
  {"current_steps": 30800, "total_steps": 38480, "loss": 0.0992, "lr": 5.827040571420792e-06, "epoch": 16.008316008316008, "percentage": 80.04, "elapsed_time": "1:14:13", "remaining_time": "0:18:30", "throughput": 1318.29, "total_tokens": 5870928}
6177
+ {"current_steps": 30805, "total_steps": 38480, "loss": 0.0675, "lr": 5.819765674558322e-06, "epoch": 16.010914760914762, "percentage": 80.05, "elapsed_time": "1:14:14", "remaining_time": "0:18:29", "throughput": 1318.3, "total_tokens": 5871920}
6178
+ {"current_steps": 30810, "total_steps": 38480, "loss": 0.1169, "lr": 5.812494723527106e-06, "epoch": 16.013513513513512, "percentage": 80.07, "elapsed_time": "1:14:14", "remaining_time": "0:18:29", "throughput": 1318.31, "total_tokens": 5872880}
6179
+ {"current_steps": 30815, "total_steps": 38480, "loss": 0.1365, "lr": 5.805227719822981e-06, "epoch": 16.016112266112266, "percentage": 80.08, "elapsed_time": "1:14:15", "remaining_time": "0:18:28", "throughput": 1318.31, "total_tokens": 5873840}
6180
+ {"current_steps": 30820, "total_steps": 38480, "loss": 0.1738, "lr": 5.797964664940916e-06, "epoch": 16.01871101871102, "percentage": 80.09, "elapsed_time": "1:14:16", "remaining_time": "0:18:27", "throughput": 1318.34, "total_tokens": 5874896}
6181
+ {"current_steps": 30825, "total_steps": 38480, "loss": 0.1158, "lr": 5.7907055603750985e-06, "epoch": 16.02130977130977, "percentage": 80.11, "elapsed_time": "1:14:16", "remaining_time": "0:18:26", "throughput": 1318.35, "total_tokens": 5875856}
6182
+ {"current_steps": 30830, "total_steps": 38480, "loss": 0.1672, "lr": 5.783450407618907e-06, "epoch": 16.023908523908524, "percentage": 80.12, "elapsed_time": "1:14:17", "remaining_time": "0:18:26", "throughput": 1318.35, "total_tokens": 5876784}
6183
+ {"current_steps": 30835, "total_steps": 38480, "loss": 0.0953, "lr": 5.776199208164901e-06, "epoch": 16.026507276507278, "percentage": 80.13, "elapsed_time": "1:14:18", "remaining_time": "0:18:25", "throughput": 1318.35, "total_tokens": 5877712}
6184
+ {"current_steps": 30840, "total_steps": 38480, "loss": 0.1479, "lr": 5.768951963504818e-06, "epoch": 16.02910602910603, "percentage": 80.15, "elapsed_time": "1:14:19", "remaining_time": "0:18:24", "throughput": 1318.35, "total_tokens": 5878640}
6185
+ {"current_steps": 30845, "total_steps": 38480, "loss": 0.1151, "lr": 5.761708675129585e-06, "epoch": 16.031704781704782, "percentage": 80.16, "elapsed_time": "1:14:19", "remaining_time": "0:18:23", "throughput": 1318.36, "total_tokens": 5879600}
6186
+ {"current_steps": 30850, "total_steps": 38480, "loss": 0.1228, "lr": 5.7544693445293315e-06, "epoch": 16.034303534303536, "percentage": 80.17, "elapsed_time": "1:14:20", "remaining_time": "0:18:23", "throughput": 1318.37, "total_tokens": 5880592}
6187
+ {"current_steps": 30855, "total_steps": 38480, "loss": 0.1197, "lr": 5.747233973193353e-06, "epoch": 16.036902286902286, "percentage": 80.18, "elapsed_time": "1:14:21", "remaining_time": "0:18:22", "throughput": 1318.37, "total_tokens": 5881488}
6188
+ {"current_steps": 30860, "total_steps": 38480, "loss": 0.1011, "lr": 5.740002562610141e-06, "epoch": 16.03950103950104, "percentage": 80.2, "elapsed_time": "1:14:21", "remaining_time": "0:18:21", "throughput": 1318.37, "total_tokens": 5882416}
6189
+ {"current_steps": 30865, "total_steps": 38480, "loss": 0.1755, "lr": 5.7327751142673606e-06, "epoch": 16.04209979209979, "percentage": 80.21, "elapsed_time": "1:14:22", "remaining_time": "0:18:21", "throughput": 1318.36, "total_tokens": 5883312}
6190
+ {"current_steps": 30870, "total_steps": 38480, "loss": 0.1277, "lr": 5.725551629651882e-06, "epoch": 16.044698544698544, "percentage": 80.22, "elapsed_time": "1:14:23", "remaining_time": "0:18:20", "throughput": 1318.35, "total_tokens": 5884208}
6191
+ {"current_steps": 30875, "total_steps": 38480, "loss": 0.0563, "lr": 5.718332110249744e-06, "epoch": 16.0472972972973, "percentage": 80.24, "elapsed_time": "1:14:24", "remaining_time": "0:18:19", "throughput": 1318.33, "total_tokens": 5885040}
6192
+ {"current_steps": 30880, "total_steps": 38480, "loss": 0.142, "lr": 5.711116557546173e-06, "epoch": 16.04989604989605, "percentage": 80.25, "elapsed_time": "1:14:24", "remaining_time": "0:18:18", "throughput": 1318.34, "total_tokens": 5886000}
6193
+ {"current_steps": 30885, "total_steps": 38480, "loss": 0.101, "lr": 5.7039049730255786e-06, "epoch": 16.052494802494802, "percentage": 80.26, "elapsed_time": "1:14:25", "remaining_time": "0:18:18", "throughput": 1318.34, "total_tokens": 5886928}
6194
+ {"current_steps": 30890, "total_steps": 38480, "loss": 0.048, "lr": 5.696697358171557e-06, "epoch": 16.055093555093556, "percentage": 80.28, "elapsed_time": "1:14:26", "remaining_time": "0:18:17", "throughput": 1318.32, "total_tokens": 5887760}
6195
+ {"current_steps": 30895, "total_steps": 38480, "loss": 0.0709, "lr": 5.689493714466879e-06, "epoch": 16.057692307692307, "percentage": 80.29, "elapsed_time": "1:14:26", "remaining_time": "0:18:16", "throughput": 1318.34, "total_tokens": 5888784}
6196
+ {"current_steps": 30900, "total_steps": 38480, "loss": 0.0612, "lr": 5.682294043393518e-06, "epoch": 16.06029106029106, "percentage": 80.3, "elapsed_time": "1:14:27", "remaining_time": "0:18:15", "throughput": 1318.33, "total_tokens": 5889680}
6197
+ {"current_steps": 30905, "total_steps": 38480, "loss": 0.0889, "lr": 5.675098346432614e-06, "epoch": 16.062889812889814, "percentage": 80.31, "elapsed_time": "1:14:28", "remaining_time": "0:18:15", "throughput": 1318.34, "total_tokens": 5890640}
6198
+ {"current_steps": 30910, "total_steps": 38480, "loss": 0.0685, "lr": 5.6679066250644924e-06, "epoch": 16.065488565488565, "percentage": 80.33, "elapsed_time": "1:14:28", "remaining_time": "0:18:14", "throughput": 1318.34, "total_tokens": 5891568}
6199
+ {"current_steps": 30915, "total_steps": 38480, "loss": 0.0611, "lr": 5.660718880768653e-06, "epoch": 16.06808731808732, "percentage": 80.34, "elapsed_time": "1:14:29", "remaining_time": "0:18:13", "throughput": 1318.35, "total_tokens": 5892528}
6200
+ {"current_steps": 30920, "total_steps": 38480, "loss": 0.089, "lr": 5.653535115023803e-06, "epoch": 16.070686070686072, "percentage": 80.35, "elapsed_time": "1:14:30", "remaining_time": "0:18:13", "throughput": 1318.34, "total_tokens": 5893424}
6201
+ {"current_steps": 30925, "total_steps": 38480, "loss": 0.1713, "lr": 5.646355329307807e-06, "epoch": 16.073284823284823, "percentage": 80.37, "elapsed_time": "1:14:31", "remaining_time": "0:18:12", "throughput": 1318.34, "total_tokens": 5894352}
6202
+ {"current_steps": 30930, "total_steps": 38480, "loss": 0.1842, "lr": 5.639179525097712e-06, "epoch": 16.075883575883577, "percentage": 80.38, "elapsed_time": "1:14:31", "remaining_time": "0:18:11", "throughput": 1318.33, "total_tokens": 5895248}
6203
+ {"current_steps": 30935, "total_steps": 38480, "loss": 0.0951, "lr": 5.632007703869751e-06, "epoch": 16.078482328482327, "percentage": 80.39, "elapsed_time": "1:14:32", "remaining_time": "0:18:10", "throughput": 1318.34, "total_tokens": 5896208}
6204
+ {"current_steps": 30940, "total_steps": 38480, "loss": 0.0652, "lr": 5.624839867099352e-06, "epoch": 16.08108108108108, "percentage": 80.41, "elapsed_time": "1:14:33", "remaining_time": "0:18:10", "throughput": 1318.33, "total_tokens": 5897104}
6205
+ {"current_steps": 30945, "total_steps": 38480, "loss": 0.0797, "lr": 5.617676016261109e-06, "epoch": 16.083679833679835, "percentage": 80.42, "elapsed_time": "1:14:33", "remaining_time": "0:18:09", "throughput": 1318.34, "total_tokens": 5898064}
6206
+ {"current_steps": 30950, "total_steps": 38480, "loss": 0.09, "lr": 5.610516152828776e-06, "epoch": 16.086278586278585, "percentage": 80.43, "elapsed_time": "1:14:34", "remaining_time": "0:18:08", "throughput": 1318.35, "total_tokens": 5899056}
6207
+ {"current_steps": 30955, "total_steps": 38480, "loss": 0.0993, "lr": 5.603360278275327e-06, "epoch": 16.08887733887734, "percentage": 80.44, "elapsed_time": "1:14:35", "remaining_time": "0:18:07", "throughput": 1318.36, "total_tokens": 5899984}
6208
+ {"current_steps": 30960, "total_steps": 38480, "loss": 0.1109, "lr": 5.596208394072891e-06, "epoch": 16.091476091476093, "percentage": 80.46, "elapsed_time": "1:14:35", "remaining_time": "0:18:07", "throughput": 1318.36, "total_tokens": 5900944}
6209
+ {"current_steps": 30965, "total_steps": 38480, "loss": 0.2016, "lr": 5.589060501692777e-06, "epoch": 16.094074844074843, "percentage": 80.47, "elapsed_time": "1:14:36", "remaining_time": "0:18:06", "throughput": 1318.35, "total_tokens": 5901840}
6210
+ {"current_steps": 30970, "total_steps": 38480, "loss": 0.106, "lr": 5.5819166026054775e-06, "epoch": 16.096673596673597, "percentage": 80.48, "elapsed_time": "1:14:37", "remaining_time": "0:18:05", "throughput": 1318.38, "total_tokens": 5902864}
6211
+ {"current_steps": 30975, "total_steps": 38480, "loss": 0.0705, "lr": 5.574776698280671e-06, "epoch": 16.09927234927235, "percentage": 80.5, "elapsed_time": "1:14:38", "remaining_time": "0:18:05", "throughput": 1318.38, "total_tokens": 5903824}
6212
+ {"current_steps": 30980, "total_steps": 38480, "loss": 0.1386, "lr": 5.567640790187198e-06, "epoch": 16.1018711018711, "percentage": 80.51, "elapsed_time": "1:14:38", "remaining_time": "0:18:04", "throughput": 1318.41, "total_tokens": 5904848}
6213
+ {"current_steps": 30985, "total_steps": 38480, "loss": 0.1007, "lr": 5.560508879793089e-06, "epoch": 16.104469854469855, "percentage": 80.52, "elapsed_time": "1:14:39", "remaining_time": "0:18:03", "throughput": 1318.41, "total_tokens": 5905808}
6214
+ {"current_steps": 30990, "total_steps": 38480, "loss": 0.1482, "lr": 5.553380968565539e-06, "epoch": 16.10706860706861, "percentage": 80.54, "elapsed_time": "1:14:40", "remaining_time": "0:18:02", "throughput": 1318.41, "total_tokens": 5906704}
6215
+ {"current_steps": 30995, "total_steps": 38480, "loss": 0.164, "lr": 5.546257057970941e-06, "epoch": 16.10966735966736, "percentage": 80.55, "elapsed_time": "1:14:40", "remaining_time": "0:18:02", "throughput": 1318.41, "total_tokens": 5907664}
6216
+ {"current_steps": 31000, "total_steps": 38480, "loss": 0.167, "lr": 5.539137149474849e-06, "epoch": 16.112266112266113, "percentage": 80.56, "elapsed_time": "1:14:41", "remaining_time": "0:18:01", "throughput": 1318.42, "total_tokens": 5908592}
6217
+ {"current_steps": 31005, "total_steps": 38480, "loss": 0.1684, "lr": 5.53202124454199e-06, "epoch": 16.114864864864863, "percentage": 80.57, "elapsed_time": "1:14:42", "remaining_time": "0:18:00", "throughput": 1318.42, "total_tokens": 5909520}
6218
+ {"current_steps": 31010, "total_steps": 38480, "loss": 0.0459, "lr": 5.524909344636297e-06, "epoch": 16.117463617463617, "percentage": 80.59, "elapsed_time": "1:14:42", "remaining_time": "0:17:59", "throughput": 1318.43, "total_tokens": 5910512}
6219
+ {"current_steps": 31015, "total_steps": 38480, "loss": 0.0977, "lr": 5.517801451220836e-06, "epoch": 16.12006237006237, "percentage": 80.6, "elapsed_time": "1:14:43", "remaining_time": "0:17:59", "throughput": 1318.44, "total_tokens": 5911504}
6220
+ {"current_steps": 31020, "total_steps": 38480, "loss": 0.2153, "lr": 5.510697565757869e-06, "epoch": 16.12266112266112, "percentage": 80.61, "elapsed_time": "1:14:44", "remaining_time": "0:17:58", "throughput": 1318.46, "total_tokens": 5912496}
6221
+ {"current_steps": 31025, "total_steps": 38480, "loss": 0.1256, "lr": 5.50359768970885e-06, "epoch": 16.125259875259875, "percentage": 80.63, "elapsed_time": "1:14:45", "remaining_time": "0:17:57", "throughput": 1318.48, "total_tokens": 5913488}
6222
+ {"current_steps": 31030, "total_steps": 38480, "loss": 0.1299, "lr": 5.496501824534386e-06, "epoch": 16.12785862785863, "percentage": 80.64, "elapsed_time": "1:14:45", "remaining_time": "0:17:56", "throughput": 1318.48, "total_tokens": 5914448}
6223
+ {"current_steps": 31035, "total_steps": 38480, "loss": 0.1028, "lr": 5.489409971694262e-06, "epoch": 16.13045738045738, "percentage": 80.65, "elapsed_time": "1:14:46", "remaining_time": "0:17:56", "throughput": 1318.5, "total_tokens": 5915440}
6224
+ {"current_steps": 31040, "total_steps": 38480, "loss": 0.0944, "lr": 5.482322132647438e-06, "epoch": 16.133056133056133, "percentage": 80.67, "elapsed_time": "1:14:47", "remaining_time": "0:17:55", "throughput": 1318.5, "total_tokens": 5916368}
6225
+ {"current_steps": 31045, "total_steps": 38480, "loss": 0.141, "lr": 5.475238308852063e-06, "epoch": 16.135654885654887, "percentage": 80.68, "elapsed_time": "1:14:47", "remaining_time": "0:17:54", "throughput": 1318.51, "total_tokens": 5917328}
6226
+ {"current_steps": 31050, "total_steps": 38480, "loss": 0.1064, "lr": 5.468158501765444e-06, "epoch": 16.138253638253637, "percentage": 80.69, "elapsed_time": "1:14:48", "remaining_time": "0:17:54", "throughput": 1318.52, "total_tokens": 5918320}
6227
+ {"current_steps": 31055, "total_steps": 38480, "loss": 0.0742, "lr": 5.4610827128440625e-06, "epoch": 16.14085239085239, "percentage": 80.7, "elapsed_time": "1:14:49", "remaining_time": "0:17:53", "throughput": 1318.53, "total_tokens": 5919280}
6228
+ {"current_steps": 31060, "total_steps": 38480, "loss": 0.0906, "lr": 5.454010943543572e-06, "epoch": 16.143451143451145, "percentage": 80.72, "elapsed_time": "1:14:49", "remaining_time": "0:17:52", "throughput": 1318.53, "total_tokens": 5920176}
6229
+ {"current_steps": 31065, "total_steps": 38480, "loss": 0.118, "lr": 5.4469431953188185e-06, "epoch": 16.146049896049895, "percentage": 80.73, "elapsed_time": "1:14:50", "remaining_time": "0:17:51", "throughput": 1318.52, "total_tokens": 5921072}
6230
+ {"current_steps": 31070, "total_steps": 38480, "loss": 0.089, "lr": 5.4398794696237984e-06, "epoch": 16.14864864864865, "percentage": 80.74, "elapsed_time": "1:14:51", "remaining_time": "0:17:51", "throughput": 1318.54, "total_tokens": 5922064}
6231
+ {"current_steps": 31075, "total_steps": 38480, "loss": 0.0869, "lr": 5.43281976791169e-06, "epoch": 16.151247401247403, "percentage": 80.76, "elapsed_time": "1:14:52", "remaining_time": "0:17:50", "throughput": 1318.54, "total_tokens": 5923024}
6232
+ {"current_steps": 31080, "total_steps": 38480, "loss": 0.0635, "lr": 5.4257640916348386e-06, "epoch": 16.153846153846153, "percentage": 80.77, "elapsed_time": "1:14:52", "remaining_time": "0:17:49", "throughput": 1318.54, "total_tokens": 5923952}
6233
+ {"current_steps": 31085, "total_steps": 38480, "loss": 0.0948, "lr": 5.418712442244767e-06, "epoch": 16.156444906444907, "percentage": 80.78, "elapsed_time": "1:14:53", "remaining_time": "0:17:48", "throughput": 1318.56, "total_tokens": 5924944}
6234
+ {"current_steps": 31090, "total_steps": 38480, "loss": 0.2837, "lr": 5.411664821192169e-06, "epoch": 16.159043659043657, "percentage": 80.8, "elapsed_time": "1:14:54", "remaining_time": "0:17:48", "throughput": 1318.56, "total_tokens": 5925872}
6235
+ {"current_steps": 31095, "total_steps": 38480, "loss": 0.1327, "lr": 5.4046212299268985e-06, "epoch": 16.16164241164241, "percentage": 80.81, "elapsed_time": "1:14:54", "remaining_time": "0:17:47", "throughput": 1318.57, "total_tokens": 5926832}
6236
+ {"current_steps": 31100, "total_steps": 38480, "loss": 0.1537, "lr": 5.397581669898005e-06, "epoch": 16.164241164241165, "percentage": 80.82, "elapsed_time": "1:14:55", "remaining_time": "0:17:46", "throughput": 1318.58, "total_tokens": 5927792}
6237
+ {"current_steps": 31105, "total_steps": 38480, "loss": 0.1607, "lr": 5.390546142553687e-06, "epoch": 16.166839916839916, "percentage": 80.83, "elapsed_time": "1:14:56", "remaining_time": "0:17:46", "throughput": 1318.58, "total_tokens": 5928752}
6238
+ {"current_steps": 31110, "total_steps": 38480, "loss": 0.0445, "lr": 5.38351464934132e-06, "epoch": 16.16943866943867, "percentage": 80.85, "elapsed_time": "1:14:57", "remaining_time": "0:17:45", "throughput": 1318.57, "total_tokens": 5929616}
6239
+ {"current_steps": 31115, "total_steps": 38480, "loss": 0.1523, "lr": 5.3764871917074435e-06, "epoch": 16.172037422037423, "percentage": 80.86, "elapsed_time": "1:14:57", "remaining_time": "0:17:44", "throughput": 1318.59, "total_tokens": 5930640}
6240
+ {"current_steps": 31120, "total_steps": 38480, "loss": 0.1788, "lr": 5.369463771097788e-06, "epoch": 16.174636174636174, "percentage": 80.87, "elapsed_time": "1:14:58", "remaining_time": "0:17:43", "throughput": 1318.61, "total_tokens": 5931632}
6241
+ {"current_steps": 31125, "total_steps": 38480, "loss": 0.075, "lr": 5.362444388957227e-06, "epoch": 16.177234927234927, "percentage": 80.89, "elapsed_time": "1:14:59", "remaining_time": "0:17:43", "throughput": 1318.62, "total_tokens": 5932592}
6242
+ {"current_steps": 31130, "total_steps": 38480, "loss": 0.1173, "lr": 5.355429046729812e-06, "epoch": 16.17983367983368, "percentage": 80.9, "elapsed_time": "1:14:59", "remaining_time": "0:17:42", "throughput": 1318.63, "total_tokens": 5933552}
6243
+ {"current_steps": 31135, "total_steps": 38480, "loss": 0.1838, "lr": 5.34841774585878e-06, "epoch": 16.18243243243243, "percentage": 80.91, "elapsed_time": "1:15:00", "remaining_time": "0:17:41", "throughput": 1318.64, "total_tokens": 5934512}
6244
+ {"current_steps": 31140, "total_steps": 38480, "loss": 0.0616, "lr": 5.341410487786519e-06, "epoch": 16.185031185031185, "percentage": 80.93, "elapsed_time": "1:15:01", "remaining_time": "0:17:40", "throughput": 1318.64, "total_tokens": 5935440}
6245
+ {"current_steps": 31145, "total_steps": 38480, "loss": 0.1106, "lr": 5.334407273954573e-06, "epoch": 16.18762993762994, "percentage": 80.94, "elapsed_time": "1:15:01", "remaining_time": "0:17:40", "throughput": 1318.64, "total_tokens": 5936368}
6246
+ {"current_steps": 31150, "total_steps": 38480, "loss": 0.1325, "lr": 5.327408105803685e-06, "epoch": 16.19022869022869, "percentage": 80.95, "elapsed_time": "1:15:02", "remaining_time": "0:17:39", "throughput": 1318.66, "total_tokens": 5937360}
6247
+ {"current_steps": 31155, "total_steps": 38480, "loss": 0.1677, "lr": 5.320412984773748e-06, "epoch": 16.192827442827443, "percentage": 80.96, "elapsed_time": "1:15:03", "remaining_time": "0:17:38", "throughput": 1318.66, "total_tokens": 5938320}
6248
+ {"current_steps": 31160, "total_steps": 38480, "loss": 0.0807, "lr": 5.313421912303826e-06, "epoch": 16.195426195426194, "percentage": 80.98, "elapsed_time": "1:15:03", "remaining_time": "0:17:38", "throughput": 1318.69, "total_tokens": 5939344}
6249
+ {"current_steps": 31165, "total_steps": 38480, "loss": 0.1288, "lr": 5.306434889832137e-06, "epoch": 16.198024948024948, "percentage": 80.99, "elapsed_time": "1:15:04", "remaining_time": "0:17:37", "throughput": 1318.69, "total_tokens": 5940272}
6250
+ {"current_steps": 31170, "total_steps": 38480, "loss": 0.1414, "lr": 5.299451918796098e-06, "epoch": 16.2006237006237, "percentage": 81.0, "elapsed_time": "1:15:05", "remaining_time": "0:17:36", "throughput": 1318.7, "total_tokens": 5941232}
6251
+ {"current_steps": 31175, "total_steps": 38480, "loss": 0.1416, "lr": 5.292473000632262e-06, "epoch": 16.203222453222452, "percentage": 81.02, "elapsed_time": "1:15:06", "remaining_time": "0:17:35", "throughput": 1318.7, "total_tokens": 5942160}
6252
+ {"current_steps": 31180, "total_steps": 38480, "loss": 0.1054, "lr": 5.285498136776357e-06, "epoch": 16.205821205821206, "percentage": 81.03, "elapsed_time": "1:15:06", "remaining_time": "0:17:35", "throughput": 1318.71, "total_tokens": 5943120}
6253
+ {"current_steps": 31185, "total_steps": 38480, "loss": 0.2564, "lr": 5.278527328663277e-06, "epoch": 16.20841995841996, "percentage": 81.04, "elapsed_time": "1:15:07", "remaining_time": "0:17:34", "throughput": 1318.74, "total_tokens": 5944176}
6254
+ {"current_steps": 31190, "total_steps": 38480, "loss": 0.0862, "lr": 5.271560577727094e-06, "epoch": 16.21101871101871, "percentage": 81.06, "elapsed_time": "1:15:08", "remaining_time": "0:17:33", "throughput": 1318.73, "total_tokens": 5945072}
6255
+ {"current_steps": 31195, "total_steps": 38480, "loss": 0.0753, "lr": 5.264597885401029e-06, "epoch": 16.213617463617464, "percentage": 81.07, "elapsed_time": "1:15:08", "remaining_time": "0:17:32", "throughput": 1318.77, "total_tokens": 5946160}
6256
+ {"current_steps": 31200, "total_steps": 38480, "loss": 0.1401, "lr": 5.257639253117472e-06, "epoch": 16.216216216216218, "percentage": 81.08, "elapsed_time": "1:15:09", "remaining_time": "0:17:32", "throughput": 1318.76, "total_tokens": 5947056}
6257
+ {"current_steps": 31205, "total_steps": 38480, "loss": 0.1292, "lr": 5.25068468230798e-06, "epoch": 16.218814968814968, "percentage": 81.09, "elapsed_time": "1:15:10", "remaining_time": "0:17:31", "throughput": 1318.78, "total_tokens": 5948048}
6258
+ {"current_steps": 31210, "total_steps": 38480, "loss": 0.093, "lr": 5.243734174403275e-06, "epoch": 16.22141372141372, "percentage": 81.11, "elapsed_time": "1:15:10", "remaining_time": "0:17:30", "throughput": 1318.79, "total_tokens": 5949040}
6259
+ {"current_steps": 31215, "total_steps": 38480, "loss": 0.2775, "lr": 5.236787730833242e-06, "epoch": 16.224012474012476, "percentage": 81.12, "elapsed_time": "1:15:11", "remaining_time": "0:17:30", "throughput": 1318.81, "total_tokens": 5950032}
6260
+ {"current_steps": 31220, "total_steps": 38480, "loss": 0.137, "lr": 5.229845353026921e-06, "epoch": 16.226611226611226, "percentage": 81.13, "elapsed_time": "1:15:12", "remaining_time": "0:17:29", "throughput": 1318.82, "total_tokens": 5951024}
6261
+ {"current_steps": 31225, "total_steps": 38480, "loss": 0.0503, "lr": 5.222907042412542e-06, "epoch": 16.22920997920998, "percentage": 81.15, "elapsed_time": "1:15:13", "remaining_time": "0:17:28", "throughput": 1318.83, "total_tokens": 5951984}
6262
+ {"current_steps": 31230, "total_steps": 38480, "loss": 0.1135, "lr": 5.2159728004174705e-06, "epoch": 16.23180873180873, "percentage": 81.16, "elapsed_time": "1:15:13", "remaining_time": "0:17:27", "throughput": 1318.85, "total_tokens": 5952976}
6263
+ {"current_steps": 31235, "total_steps": 38480, "loss": 0.092, "lr": 5.209042628468247e-06, "epoch": 16.234407484407484, "percentage": 81.17, "elapsed_time": "1:15:14", "remaining_time": "0:17:27", "throughput": 1318.85, "total_tokens": 5953904}
6264
+ {"current_steps": 31240, "total_steps": 38480, "loss": 0.1144, "lr": 5.202116527990566e-06, "epoch": 16.237006237006238, "percentage": 81.19, "elapsed_time": "1:15:15", "remaining_time": "0:17:26", "throughput": 1318.84, "total_tokens": 5954768}
6265
+ {"current_steps": 31245, "total_steps": 38480, "loss": 0.2889, "lr": 5.1951945004093064e-06, "epoch": 16.239604989604988, "percentage": 81.2, "elapsed_time": "1:15:15", "remaining_time": "0:17:25", "throughput": 1318.84, "total_tokens": 5955728}
6266
+ {"current_steps": 31250, "total_steps": 38480, "loss": 0.0807, "lr": 5.188276547148485e-06, "epoch": 16.242203742203742, "percentage": 81.21, "elapsed_time": "1:15:16", "remaining_time": "0:17:24", "throughput": 1318.86, "total_tokens": 5956720}
6267
+ {"current_steps": 31255, "total_steps": 38480, "loss": 0.1202, "lr": 5.181362669631284e-06, "epoch": 16.244802494802496, "percentage": 81.22, "elapsed_time": "1:15:17", "remaining_time": "0:17:24", "throughput": 1318.87, "total_tokens": 5957680}
6268
+ {"current_steps": 31260, "total_steps": 38480, "loss": 0.082, "lr": 5.1744528692800666e-06, "epoch": 16.247401247401246, "percentage": 81.24, "elapsed_time": "1:15:17", "remaining_time": "0:17:23", "throughput": 1318.87, "total_tokens": 5958608}
6269
+ {"current_steps": 31265, "total_steps": 38480, "loss": 0.243, "lr": 5.167547147516335e-06, "epoch": 16.25, "percentage": 81.25, "elapsed_time": "1:15:18", "remaining_time": "0:17:22", "throughput": 1318.87, "total_tokens": 5959536}
6270
+ {"current_steps": 31270, "total_steps": 38480, "loss": 0.1524, "lr": 5.160645505760761e-06, "epoch": 16.252598752598754, "percentage": 81.26, "elapsed_time": "1:15:19", "remaining_time": "0:17:22", "throughput": 1318.88, "total_tokens": 5960528}
6271
+ {"current_steps": 31275, "total_steps": 38480, "loss": 0.1825, "lr": 5.1537479454331795e-06, "epoch": 16.255197505197504, "percentage": 81.28, "elapsed_time": "1:15:20", "remaining_time": "0:17:21", "throughput": 1318.9, "total_tokens": 5961520}
6272
+ {"current_steps": 31280, "total_steps": 38480, "loss": 0.0922, "lr": 5.146854467952581e-06, "epoch": 16.257796257796258, "percentage": 81.29, "elapsed_time": "1:15:20", "remaining_time": "0:17:20", "throughput": 1318.92, "total_tokens": 5962544}
6273
+ {"current_steps": 31285, "total_steps": 38480, "loss": 0.1056, "lr": 5.139965074737119e-06, "epoch": 16.260395010395012, "percentage": 81.3, "elapsed_time": "1:15:21", "remaining_time": "0:17:19", "throughput": 1318.92, "total_tokens": 5963440}
6274
+ {"current_steps": 31290, "total_steps": 38480, "loss": 0.1371, "lr": 5.133079767204097e-06, "epoch": 16.262993762993762, "percentage": 81.31, "elapsed_time": "1:15:22", "remaining_time": "0:17:19", "throughput": 1318.92, "total_tokens": 5964368}
6275
+ {"current_steps": 31295, "total_steps": 38480, "loss": 0.0887, "lr": 5.126198546770003e-06, "epoch": 16.265592515592516, "percentage": 81.33, "elapsed_time": "1:15:22", "remaining_time": "0:17:18", "throughput": 1318.93, "total_tokens": 5965328}
6276
+ {"current_steps": 31300, "total_steps": 38480, "loss": 0.1259, "lr": 5.119321414850459e-06, "epoch": 16.26819126819127, "percentage": 81.34, "elapsed_time": "1:15:23", "remaining_time": "0:17:17", "throughput": 1318.92, "total_tokens": 5966224}
6277
+ {"current_steps": 31305, "total_steps": 38480, "loss": 0.0524, "lr": 5.112448372860257e-06, "epoch": 16.27079002079002, "percentage": 81.35, "elapsed_time": "1:15:24", "remaining_time": "0:17:16", "throughput": 1318.93, "total_tokens": 5967184}
6278
+ {"current_steps": 31310, "total_steps": 38480, "loss": 0.0953, "lr": 5.105579422213335e-06, "epoch": 16.273388773388774, "percentage": 81.37, "elapsed_time": "1:15:24", "remaining_time": "0:17:16", "throughput": 1318.94, "total_tokens": 5968144}
6279
+ {"current_steps": 31315, "total_steps": 38480, "loss": 0.0511, "lr": 5.098714564322818e-06, "epoch": 16.275987525987524, "percentage": 81.38, "elapsed_time": "1:15:25", "remaining_time": "0:17:15", "throughput": 1318.95, "total_tokens": 5969136}
6280
+ {"current_steps": 31320, "total_steps": 38480, "loss": 0.0997, "lr": 5.091853800600963e-06, "epoch": 16.27858627858628, "percentage": 81.39, "elapsed_time": "1:15:26", "remaining_time": "0:17:14", "throughput": 1318.96, "total_tokens": 5970064}
6281
+ {"current_steps": 31325, "total_steps": 38480, "loss": 0.1263, "lr": 5.084997132459191e-06, "epoch": 16.281185031185032, "percentage": 81.41, "elapsed_time": "1:15:27", "remaining_time": "0:17:14", "throughput": 1318.96, "total_tokens": 5970992}
6282
+ {"current_steps": 31330, "total_steps": 38480, "loss": 0.2487, "lr": 5.078144561308077e-06, "epoch": 16.283783783783782, "percentage": 81.42, "elapsed_time": "1:15:27", "remaining_time": "0:17:13", "throughput": 1318.98, "total_tokens": 5972016}
6283
+ {"current_steps": 31335, "total_steps": 38480, "loss": 0.0583, "lr": 5.071296088557376e-06, "epoch": 16.286382536382536, "percentage": 81.43, "elapsed_time": "1:15:28", "remaining_time": "0:17:12", "throughput": 1318.98, "total_tokens": 5972912}
6284
+ {"current_steps": 31340, "total_steps": 38480, "loss": 0.1284, "lr": 5.064451715615961e-06, "epoch": 16.28898128898129, "percentage": 81.44, "elapsed_time": "1:15:29", "remaining_time": "0:17:11", "throughput": 1318.99, "total_tokens": 5973872}
6285
+ {"current_steps": 31345, "total_steps": 38480, "loss": 0.03, "lr": 5.057611443891888e-06, "epoch": 16.29158004158004, "percentage": 81.46, "elapsed_time": "1:15:29", "remaining_time": "0:17:11", "throughput": 1318.98, "total_tokens": 5974768}
6286
+ {"current_steps": 31350, "total_steps": 38480, "loss": 0.1013, "lr": 5.050775274792375e-06, "epoch": 16.294178794178794, "percentage": 81.47, "elapsed_time": "1:15:30", "remaining_time": "0:17:10", "throughput": 1318.99, "total_tokens": 5975728}
6287
+ {"current_steps": 31355, "total_steps": 38480, "loss": 0.1691, "lr": 5.043943209723776e-06, "epoch": 16.296777546777548, "percentage": 81.48, "elapsed_time": "1:15:31", "remaining_time": "0:17:09", "throughput": 1319.0, "total_tokens": 5976688}
6288
+ {"current_steps": 31360, "total_steps": 38480, "loss": 0.1165, "lr": 5.037115250091612e-06, "epoch": 16.2993762993763, "percentage": 81.5, "elapsed_time": "1:15:31", "remaining_time": "0:17:08", "throughput": 1319.01, "total_tokens": 5977680}
6289
+ {"current_steps": 31365, "total_steps": 38480, "loss": 0.1203, "lr": 5.030291397300546e-06, "epoch": 16.301975051975052, "percentage": 81.51, "elapsed_time": "1:15:32", "remaining_time": "0:17:08", "throughput": 1319.01, "total_tokens": 5978608}
6290
+ {"current_steps": 31370, "total_steps": 38480, "loss": 0.1996, "lr": 5.023471652754427e-06, "epoch": 16.304573804573806, "percentage": 81.52, "elapsed_time": "1:15:33", "remaining_time": "0:17:07", "throughput": 1319.01, "total_tokens": 5979504}
6291
+ {"current_steps": 31375, "total_steps": 38480, "loss": 0.1077, "lr": 5.016656017856228e-06, "epoch": 16.307172557172557, "percentage": 81.54, "elapsed_time": "1:15:34", "remaining_time": "0:17:06", "throughput": 1319.02, "total_tokens": 5980464}
6292
+ {"current_steps": 31380, "total_steps": 38480, "loss": 0.104, "lr": 5.009844494008084e-06, "epoch": 16.30977130977131, "percentage": 81.55, "elapsed_time": "1:15:34", "remaining_time": "0:17:06", "throughput": 1319.03, "total_tokens": 5981456}
6293
+ {"current_steps": 31385, "total_steps": 38480, "loss": 0.1912, "lr": 5.003037082611295e-06, "epoch": 16.31237006237006, "percentage": 81.56, "elapsed_time": "1:15:35", "remaining_time": "0:17:05", "throughput": 1319.03, "total_tokens": 5982384}
6294
+ {"current_steps": 31390, "total_steps": 38480, "loss": 0.0861, "lr": 4.99623378506631e-06, "epoch": 16.314968814968815, "percentage": 81.57, "elapsed_time": "1:15:36", "remaining_time": "0:17:04", "throughput": 1319.05, "total_tokens": 5983376}
6295
+ {"current_steps": 31395, "total_steps": 38480, "loss": 0.0807, "lr": 4.989434602772722e-06, "epoch": 16.31756756756757, "percentage": 81.59, "elapsed_time": "1:15:36", "remaining_time": "0:17:03", "throughput": 1319.06, "total_tokens": 5984336}
6296
+ {"current_steps": 31400, "total_steps": 38480, "loss": 0.073, "lr": 4.982639537129285e-06, "epoch": 16.32016632016632, "percentage": 81.6, "elapsed_time": "1:15:37", "remaining_time": "0:17:03", "throughput": 1319.05, "total_tokens": 5985232}
6297
+ {"current_steps": 31405, "total_steps": 38480, "loss": 0.1326, "lr": 4.975848589533913e-06, "epoch": 16.322765072765073, "percentage": 81.61, "elapsed_time": "1:15:38", "remaining_time": "0:17:02", "throughput": 1319.05, "total_tokens": 5986160}
6298
+ {"current_steps": 31410, "total_steps": 38480, "loss": 0.0695, "lr": 4.969061761383659e-06, "epoch": 16.325363825363826, "percentage": 81.63, "elapsed_time": "1:15:38", "remaining_time": "0:17:01", "throughput": 1319.06, "total_tokens": 5987120}
6299
+ {"current_steps": 31415, "total_steps": 38480, "loss": 0.0767, "lr": 4.962279054074728e-06, "epoch": 16.327962577962577, "percentage": 81.64, "elapsed_time": "1:15:39", "remaining_time": "0:17:00", "throughput": 1319.05, "total_tokens": 5988016}
6300
+ {"current_steps": 31420, "total_steps": 38480, "loss": 0.0661, "lr": 4.955500469002506e-06, "epoch": 16.33056133056133, "percentage": 81.65, "elapsed_time": "1:15:40", "remaining_time": "0:17:00", "throughput": 1319.06, "total_tokens": 5988976}
6301
+ {"current_steps": 31425, "total_steps": 38480, "loss": 0.2745, "lr": 4.948726007561494e-06, "epoch": 16.333160083160084, "percentage": 81.67, "elapsed_time": "1:15:41", "remaining_time": "0:16:59", "throughput": 1319.07, "total_tokens": 5989936}
6302
+ {"current_steps": 31430, "total_steps": 38480, "loss": 0.043, "lr": 4.941955671145363e-06, "epoch": 16.335758835758835, "percentage": 81.68, "elapsed_time": "1:15:41", "remaining_time": "0:16:58", "throughput": 1319.07, "total_tokens": 5990864}
6303
+ {"current_steps": 31435, "total_steps": 38480, "loss": 0.1623, "lr": 4.935189461146928e-06, "epoch": 16.33835758835759, "percentage": 81.69, "elapsed_time": "1:15:42", "remaining_time": "0:16:58", "throughput": 1319.07, "total_tokens": 5991792}
6304
+ {"current_steps": 31440, "total_steps": 38480, "loss": 0.0809, "lr": 4.92842737895817e-06, "epoch": 16.340956340956343, "percentage": 81.7, "elapsed_time": "1:15:43", "remaining_time": "0:16:57", "throughput": 1319.06, "total_tokens": 5992656}
6305
+ {"current_steps": 31445, "total_steps": 38480, "loss": 0.1109, "lr": 4.9216694259702044e-06, "epoch": 16.343555093555093, "percentage": 81.72, "elapsed_time": "1:15:43", "remaining_time": "0:16:56", "throughput": 1319.07, "total_tokens": 5993616}
6306
+ {"current_steps": 31450, "total_steps": 38480, "loss": 0.2286, "lr": 4.914915603573306e-06, "epoch": 16.346153846153847, "percentage": 81.73, "elapsed_time": "1:15:44", "remaining_time": "0:16:55", "throughput": 1319.09, "total_tokens": 5994608}
6307
+ {"current_steps": 31455, "total_steps": 38480, "loss": 0.1242, "lr": 4.908165913156887e-06, "epoch": 16.348752598752597, "percentage": 81.74, "elapsed_time": "1:15:45", "remaining_time": "0:16:55", "throughput": 1319.08, "total_tokens": 5995536}
6308
+ {"current_steps": 31460, "total_steps": 38480, "loss": 0.1089, "lr": 4.901420356109535e-06, "epoch": 16.35135135135135, "percentage": 81.76, "elapsed_time": "1:15:45", "remaining_time": "0:16:54", "throughput": 1319.08, "total_tokens": 5996464}
6309
+ {"current_steps": 31465, "total_steps": 38480, "loss": 0.0812, "lr": 4.8946789338189695e-06, "epoch": 16.353950103950105, "percentage": 81.77, "elapsed_time": "1:15:46", "remaining_time": "0:16:53", "throughput": 1319.09, "total_tokens": 5997424}
6310
+ {"current_steps": 31470, "total_steps": 38480, "loss": 0.0701, "lr": 4.8879416476720455e-06, "epoch": 16.356548856548855, "percentage": 81.78, "elapsed_time": "1:15:47", "remaining_time": "0:16:52", "throughput": 1319.09, "total_tokens": 5998352}
6311
+ {"current_steps": 31475, "total_steps": 38480, "loss": 0.1637, "lr": 4.881208499054798e-06, "epoch": 16.35914760914761, "percentage": 81.8, "elapsed_time": "1:15:48", "remaining_time": "0:16:52", "throughput": 1319.1, "total_tokens": 5999312}
6312
+ {"current_steps": 31480, "total_steps": 38480, "loss": 0.065, "lr": 4.874479489352396e-06, "epoch": 16.361746361746363, "percentage": 81.81, "elapsed_time": "1:15:48", "remaining_time": "0:16:51", "throughput": 1319.13, "total_tokens": 6000368}
6313
+ {"current_steps": 31485, "total_steps": 38480, "loss": 0.1433, "lr": 4.8677546199491496e-06, "epoch": 16.364345114345113, "percentage": 81.82, "elapsed_time": "1:15:49", "remaining_time": "0:16:50", "throughput": 1319.14, "total_tokens": 6001328}
6314
+ {"current_steps": 31490, "total_steps": 38480, "loss": 0.06, "lr": 4.8610338922285255e-06, "epoch": 16.366943866943867, "percentage": 81.83, "elapsed_time": "1:15:50", "remaining_time": "0:16:50", "throughput": 1319.15, "total_tokens": 6002320}
6315
+ {"current_steps": 31495, "total_steps": 38480, "loss": 0.1032, "lr": 4.8543173075731445e-06, "epoch": 16.36954261954262, "percentage": 81.85, "elapsed_time": "1:15:50", "remaining_time": "0:16:49", "throughput": 1319.14, "total_tokens": 6003216}
6316
+ {"current_steps": 31500, "total_steps": 38480, "loss": 0.1837, "lr": 4.847604867364766e-06, "epoch": 16.37214137214137, "percentage": 81.86, "elapsed_time": "1:15:51", "remaining_time": "0:16:48", "throughput": 1319.15, "total_tokens": 6004176}
6317
+ {"current_steps": 31505, "total_steps": 38480, "loss": 0.1169, "lr": 4.840896572984291e-06, "epoch": 16.374740124740125, "percentage": 81.87, "elapsed_time": "1:15:52", "remaining_time": "0:16:47", "throughput": 1319.15, "total_tokens": 6005104}
6318
+ {"current_steps": 31510, "total_steps": 38480, "loss": 0.1197, "lr": 4.83419242581179e-06, "epoch": 16.37733887733888, "percentage": 81.89, "elapsed_time": "1:15:52", "remaining_time": "0:16:47", "throughput": 1319.16, "total_tokens": 6006032}
6319
+ {"current_steps": 31515, "total_steps": 38480, "loss": 0.0823, "lr": 4.827492427226454e-06, "epoch": 16.37993762993763, "percentage": 81.9, "elapsed_time": "1:15:53", "remaining_time": "0:16:46", "throughput": 1319.16, "total_tokens": 6006992}
6320
+ {"current_steps": 31520, "total_steps": 38480, "loss": 0.1667, "lr": 4.820796578606637e-06, "epoch": 16.382536382536383, "percentage": 81.91, "elapsed_time": "1:15:54", "remaining_time": "0:16:45", "throughput": 1319.18, "total_tokens": 6007984}
6321
+ {"current_steps": 31525, "total_steps": 38480, "loss": 0.1222, "lr": 4.814104881329828e-06, "epoch": 16.385135135135137, "percentage": 81.93, "elapsed_time": "1:15:55", "remaining_time": "0:16:44", "throughput": 1319.18, "total_tokens": 6008912}
6322
+ {"current_steps": 31530, "total_steps": 38480, "loss": 0.0717, "lr": 4.807417336772687e-06, "epoch": 16.387733887733887, "percentage": 81.94, "elapsed_time": "1:15:55", "remaining_time": "0:16:44", "throughput": 1319.18, "total_tokens": 6009840}
6323
+ {"current_steps": 31535, "total_steps": 38480, "loss": 0.158, "lr": 4.800733946310981e-06, "epoch": 16.39033264033264, "percentage": 81.95, "elapsed_time": "1:15:56", "remaining_time": "0:16:43", "throughput": 1319.19, "total_tokens": 6010800}
6324
+ {"current_steps": 31540, "total_steps": 38480, "loss": 0.1936, "lr": 4.794054711319646e-06, "epoch": 16.39293139293139, "percentage": 81.96, "elapsed_time": "1:15:57", "remaining_time": "0:16:42", "throughput": 1319.2, "total_tokens": 6011792}
6325
+ {"current_steps": 31545, "total_steps": 38480, "loss": 0.1654, "lr": 4.787379633172767e-06, "epoch": 16.395530145530145, "percentage": 81.98, "elapsed_time": "1:15:57", "remaining_time": "0:16:42", "throughput": 1319.2, "total_tokens": 6012688}
6326
+ {"current_steps": 31550, "total_steps": 38480, "loss": 0.1103, "lr": 4.780708713243565e-06, "epoch": 16.3981288981289, "percentage": 81.99, "elapsed_time": "1:15:58", "remaining_time": "0:16:41", "throughput": 1319.21, "total_tokens": 6013680}
6327
+ {"current_steps": 31555, "total_steps": 38480, "loss": 0.1493, "lr": 4.774041952904407e-06, "epoch": 16.40072765072765, "percentage": 82.0, "elapsed_time": "1:15:59", "remaining_time": "0:16:40", "throughput": 1319.23, "total_tokens": 6014672}
6328
+ {"current_steps": 31560, "total_steps": 38480, "loss": 0.0533, "lr": 4.767379353526796e-06, "epoch": 16.403326403326403, "percentage": 82.02, "elapsed_time": "1:15:59", "remaining_time": "0:16:39", "throughput": 1319.24, "total_tokens": 6015632}
6329
+ {"current_steps": 31565, "total_steps": 38480, "loss": 0.0854, "lr": 4.760720916481401e-06, "epoch": 16.405925155925157, "percentage": 82.03, "elapsed_time": "1:16:00", "remaining_time": "0:16:39", "throughput": 1319.25, "total_tokens": 6016592}
6330
+ {"current_steps": 31570, "total_steps": 38480, "loss": 0.0775, "lr": 4.7540666431380166e-06, "epoch": 16.408523908523907, "percentage": 82.04, "elapsed_time": "1:16:01", "remaining_time": "0:16:38", "throughput": 1319.25, "total_tokens": 6017520}
6331
+ {"current_steps": 31575, "total_steps": 38480, "loss": 0.0846, "lr": 4.747416534865581e-06, "epoch": 16.41112266112266, "percentage": 82.06, "elapsed_time": "1:16:02", "remaining_time": "0:16:37", "throughput": 1319.24, "total_tokens": 6018416}
6332
+ {"current_steps": 31580, "total_steps": 38480, "loss": 0.1156, "lr": 4.740770593032176e-06, "epoch": 16.413721413721415, "percentage": 82.07, "elapsed_time": "1:16:02", "remaining_time": "0:16:36", "throughput": 1319.25, "total_tokens": 6019376}
6333
+ {"current_steps": 31585, "total_steps": 38480, "loss": 0.1764, "lr": 4.7341288190050455e-06, "epoch": 16.416320166320165, "percentage": 82.08, "elapsed_time": "1:16:03", "remaining_time": "0:16:36", "throughput": 1319.26, "total_tokens": 6020368}
6334
+ {"current_steps": 31590, "total_steps": 38480, "loss": 0.1609, "lr": 4.7274912141505505e-06, "epoch": 16.41891891891892, "percentage": 82.09, "elapsed_time": "1:16:04", "remaining_time": "0:16:35", "throughput": 1319.29, "total_tokens": 6021424}
6335
+ {"current_steps": 31595, "total_steps": 38480, "loss": 0.137, "lr": 4.7208577798342066e-06, "epoch": 16.421517671517673, "percentage": 82.11, "elapsed_time": "1:16:04", "remaining_time": "0:16:34", "throughput": 1319.29, "total_tokens": 6022352}
6336
+ {"current_steps": 31600, "total_steps": 38480, "loss": 0.078, "lr": 4.7142285174206684e-06, "epoch": 16.424116424116423, "percentage": 82.12, "elapsed_time": "1:16:05", "remaining_time": "0:16:34", "throughput": 1319.3, "total_tokens": 6023312}
6337
+ {"current_steps": 31605, "total_steps": 38480, "loss": 0.1806, "lr": 4.707603428273735e-06, "epoch": 16.426715176715177, "percentage": 82.13, "elapsed_time": "1:16:06", "remaining_time": "0:16:33", "throughput": 1319.3, "total_tokens": 6024240}
6338
+ {"current_steps": 31610, "total_steps": 38480, "loss": 0.1189, "lr": 4.700982513756341e-06, "epoch": 16.429313929313928, "percentage": 82.15, "elapsed_time": "1:16:06", "remaining_time": "0:16:32", "throughput": 1319.32, "total_tokens": 6025232}
6339
+ {"current_steps": 31615, "total_steps": 38480, "loss": 0.0466, "lr": 4.694365775230566e-06, "epoch": 16.43191268191268, "percentage": 82.16, "elapsed_time": "1:16:07", "remaining_time": "0:16:31", "throughput": 1319.33, "total_tokens": 6026192}
6340
+ {"current_steps": 31620, "total_steps": 38480, "loss": 0.1556, "lr": 4.687753214057638e-06, "epoch": 16.434511434511435, "percentage": 82.17, "elapsed_time": "1:16:08", "remaining_time": "0:16:31", "throughput": 1319.33, "total_tokens": 6027152}
6341
+ {"current_steps": 31625, "total_steps": 38480, "loss": 0.0657, "lr": 4.681144831597914e-06, "epoch": 16.437110187110186, "percentage": 82.19, "elapsed_time": "1:16:09", "remaining_time": "0:16:30", "throughput": 1319.34, "total_tokens": 6028080}
6342
+ {"current_steps": 31630, "total_steps": 38480, "loss": 0.1547, "lr": 4.674540629210888e-06, "epoch": 16.43970893970894, "percentage": 82.2, "elapsed_time": "1:16:09", "remaining_time": "0:16:29", "throughput": 1319.35, "total_tokens": 6029040}
6343
+ {"current_steps": 31635, "total_steps": 38480, "loss": 0.2409, "lr": 4.667940608255219e-06, "epoch": 16.442307692307693, "percentage": 82.21, "elapsed_time": "1:16:10", "remaining_time": "0:16:28", "throughput": 1319.36, "total_tokens": 6030032}
6344
+ {"current_steps": 31640, "total_steps": 38480, "loss": 0.1838, "lr": 4.661344770088677e-06, "epoch": 16.444906444906444, "percentage": 82.22, "elapsed_time": "1:16:11", "remaining_time": "0:16:28", "throughput": 1319.35, "total_tokens": 6030896}
6345
+ {"current_steps": 31645, "total_steps": 38480, "loss": 0.1299, "lr": 4.654753116068186e-06, "epoch": 16.447505197505198, "percentage": 82.24, "elapsed_time": "1:16:11", "remaining_time": "0:16:27", "throughput": 1319.38, "total_tokens": 6031952}
6346
+ {"current_steps": 31650, "total_steps": 38480, "loss": 0.1305, "lr": 4.648165647549802e-06, "epoch": 16.45010395010395, "percentage": 82.25, "elapsed_time": "1:16:12", "remaining_time": "0:16:26", "throughput": 1319.4, "total_tokens": 6032944}
6347
+ {"current_steps": 31655, "total_steps": 38480, "loss": 0.085, "lr": 4.64158236588873e-06, "epoch": 16.4527027027027, "percentage": 82.26, "elapsed_time": "1:16:13", "remaining_time": "0:16:26", "throughput": 1319.4, "total_tokens": 6033904}
6348
+ {"current_steps": 31660, "total_steps": 38480, "loss": 0.1249, "lr": 4.635003272439309e-06, "epoch": 16.455301455301456, "percentage": 82.28, "elapsed_time": "1:16:13", "remaining_time": "0:16:25", "throughput": 1319.39, "total_tokens": 6034768}
6349
+ {"current_steps": 31665, "total_steps": 38480, "loss": 0.1337, "lr": 4.62842836855501e-06, "epoch": 16.45790020790021, "percentage": 82.29, "elapsed_time": "1:16:14", "remaining_time": "0:16:24", "throughput": 1319.39, "total_tokens": 6035696}
6350
+ {"current_steps": 31670, "total_steps": 38480, "loss": 0.1169, "lr": 4.621857655588449e-06, "epoch": 16.46049896049896, "percentage": 82.3, "elapsed_time": "1:16:15", "remaining_time": "0:16:23", "throughput": 1319.4, "total_tokens": 6036656}
6351
+ {"current_steps": 31675, "total_steps": 38480, "loss": 0.062, "lr": 4.615291134891381e-06, "epoch": 16.463097713097714, "percentage": 82.32, "elapsed_time": "1:16:16", "remaining_time": "0:16:23", "throughput": 1319.4, "total_tokens": 6037584}
6352
+ {"current_steps": 31680, "total_steps": 38480, "loss": 0.0782, "lr": 4.608728807814694e-06, "epoch": 16.465696465696467, "percentage": 82.33, "elapsed_time": "1:16:16", "remaining_time": "0:16:22", "throughput": 1319.42, "total_tokens": 6038576}
6353
+ {"current_steps": 31685, "total_steps": 38480, "loss": 0.0864, "lr": 4.602170675708406e-06, "epoch": 16.468295218295218, "percentage": 82.34, "elapsed_time": "1:16:17", "remaining_time": "0:16:21", "throughput": 1319.41, "total_tokens": 6039504}
6354
+ {"current_steps": 31690, "total_steps": 38480, "loss": 0.0935, "lr": 4.595616739921696e-06, "epoch": 16.47089397089397, "percentage": 82.35, "elapsed_time": "1:16:18", "remaining_time": "0:16:20", "throughput": 1319.42, "total_tokens": 6040464}
6355
+ {"current_steps": 31695, "total_steps": 38480, "loss": 0.1212, "lr": 4.58906700180286e-06, "epoch": 16.473492723492722, "percentage": 82.37, "elapsed_time": "1:16:18", "remaining_time": "0:16:20", "throughput": 1319.42, "total_tokens": 6041360}
6356
+ {"current_steps": 31700, "total_steps": 38480, "loss": 0.3476, "lr": 4.582521462699332e-06, "epoch": 16.476091476091476, "percentage": 82.38, "elapsed_time": "1:16:19", "remaining_time": "0:16:19", "throughput": 1319.42, "total_tokens": 6042320}
6357
+ {"current_steps": 31705, "total_steps": 38480, "loss": 0.1064, "lr": 4.5759801239576824e-06, "epoch": 16.47869022869023, "percentage": 82.39, "elapsed_time": "1:16:20", "remaining_time": "0:16:18", "throughput": 1319.44, "total_tokens": 6043312}
6358
+ {"current_steps": 31710, "total_steps": 38480, "loss": 0.1221, "lr": 4.569442986923631e-06, "epoch": 16.48128898128898, "percentage": 82.41, "elapsed_time": "1:16:20", "remaining_time": "0:16:18", "throughput": 1319.45, "total_tokens": 6044272}
6359
+ {"current_steps": 31715, "total_steps": 38480, "loss": 0.1467, "lr": 4.562910052942015e-06, "epoch": 16.483887733887734, "percentage": 82.42, "elapsed_time": "1:16:21", "remaining_time": "0:16:17", "throughput": 1319.45, "total_tokens": 6045200}
6360
+ {"current_steps": 31720, "total_steps": 38480, "loss": 0.1059, "lr": 4.5563813233568205e-06, "epoch": 16.486486486486488, "percentage": 82.43, "elapsed_time": "1:16:22", "remaining_time": "0:16:16", "throughput": 1319.47, "total_tokens": 6046224}
6361
+ {"current_steps": 31725, "total_steps": 38480, "loss": 0.0867, "lr": 4.549856799511149e-06, "epoch": 16.489085239085238, "percentage": 82.45, "elapsed_time": "1:16:23", "remaining_time": "0:16:15", "throughput": 1319.48, "total_tokens": 6047184}
6362
+ {"current_steps": 31730, "total_steps": 38480, "loss": 0.0928, "lr": 4.543336482747274e-06, "epoch": 16.491683991683992, "percentage": 82.46, "elapsed_time": "1:16:23", "remaining_time": "0:16:15", "throughput": 1319.48, "total_tokens": 6048112}
6363
+ {"current_steps": 31735, "total_steps": 38480, "loss": 0.1217, "lr": 4.536820374406559e-06, "epoch": 16.494282744282746, "percentage": 82.47, "elapsed_time": "1:16:24", "remaining_time": "0:16:14", "throughput": 1319.47, "total_tokens": 6048976}
6364
+ {"current_steps": 31740, "total_steps": 38480, "loss": 0.198, "lr": 4.530308475829523e-06, "epoch": 16.496881496881496, "percentage": 82.48, "elapsed_time": "1:16:25", "remaining_time": "0:16:13", "throughput": 1319.47, "total_tokens": 6049904}
6365
+ {"current_steps": 31745, "total_steps": 38480, "loss": 0.0619, "lr": 4.523800788355834e-06, "epoch": 16.49948024948025, "percentage": 82.5, "elapsed_time": "1:16:25", "remaining_time": "0:16:12", "throughput": 1319.46, "total_tokens": 6050800}
6366
+ {"current_steps": 31750, "total_steps": 38480, "loss": 0.0875, "lr": 4.517297313324268e-06, "epoch": 16.502079002079, "percentage": 82.51, "elapsed_time": "1:16:26", "remaining_time": "0:16:12", "throughput": 1319.49, "total_tokens": 6051856}
6367
+ {"current_steps": 31755, "total_steps": 38480, "loss": 0.1222, "lr": 4.51079805207274e-06, "epoch": 16.504677754677754, "percentage": 82.52, "elapsed_time": "1:16:27", "remaining_time": "0:16:11", "throughput": 1319.48, "total_tokens": 6052752}
6368
+ {"current_steps": 31760, "total_steps": 38480, "loss": 0.2562, "lr": 4.504303005938318e-06, "epoch": 16.507276507276508, "percentage": 82.54, "elapsed_time": "1:16:27", "remaining_time": "0:16:10", "throughput": 1319.47, "total_tokens": 6053616}
6369
+ {"current_steps": 31765, "total_steps": 38480, "loss": 0.2017, "lr": 4.497812176257179e-06, "epoch": 16.50987525987526, "percentage": 82.55, "elapsed_time": "1:16:28", "remaining_time": "0:16:10", "throughput": 1319.48, "total_tokens": 6054576}
6370
+ {"current_steps": 31770, "total_steps": 38480, "loss": 0.1834, "lr": 4.491325564364643e-06, "epoch": 16.512474012474012, "percentage": 82.56, "elapsed_time": "1:16:29", "remaining_time": "0:16:09", "throughput": 1319.48, "total_tokens": 6055504}
6371
+ {"current_steps": 31775, "total_steps": 38480, "loss": 0.1314, "lr": 4.4848431715951514e-06, "epoch": 16.515072765072766, "percentage": 82.58, "elapsed_time": "1:16:30", "remaining_time": "0:16:08", "throughput": 1319.49, "total_tokens": 6056464}
6372
+ {"current_steps": 31780, "total_steps": 38480, "loss": 0.0774, "lr": 4.4783649992823e-06, "epoch": 16.517671517671516, "percentage": 82.59, "elapsed_time": "1:16:30", "remaining_time": "0:16:07", "throughput": 1319.53, "total_tokens": 6057552}
6373
+ {"current_steps": 31785, "total_steps": 38480, "loss": 0.0745, "lr": 4.471891048758803e-06, "epoch": 16.52027027027027, "percentage": 82.6, "elapsed_time": "1:16:31", "remaining_time": "0:16:07", "throughput": 1319.52, "total_tokens": 6058448}
6374
+ {"current_steps": 31790, "total_steps": 38480, "loss": 0.1743, "lr": 4.465421321356497e-06, "epoch": 16.522869022869024, "percentage": 82.61, "elapsed_time": "1:16:32", "remaining_time": "0:16:06", "throughput": 1319.53, "total_tokens": 6059408}
6375
+ {"current_steps": 31795, "total_steps": 38480, "loss": 0.0792, "lr": 4.458955818406365e-06, "epoch": 16.525467775467774, "percentage": 82.63, "elapsed_time": "1:16:32", "remaining_time": "0:16:05", "throughput": 1319.53, "total_tokens": 6060336}
6376
+ {"current_steps": 31800, "total_steps": 38480, "loss": 0.0368, "lr": 4.452494541238514e-06, "epoch": 16.528066528066528, "percentage": 82.64, "elapsed_time": "1:16:33", "remaining_time": "0:16:04", "throughput": 1319.55, "total_tokens": 6061328}
6377
+ {"current_steps": 31805, "total_steps": 38480, "loss": 0.0573, "lr": 4.4460374911821816e-06, "epoch": 16.530665280665282, "percentage": 82.65, "elapsed_time": "1:16:34", "remaining_time": "0:16:04", "throughput": 1319.56, "total_tokens": 6062288}
6378
+ {"current_steps": 31810, "total_steps": 38480, "loss": 0.0611, "lr": 4.4395846695657325e-06, "epoch": 16.533264033264032, "percentage": 82.67, "elapsed_time": "1:16:34", "remaining_time": "0:16:03", "throughput": 1319.56, "total_tokens": 6063216}
6379
+ {"current_steps": 31815, "total_steps": 38480, "loss": 0.0837, "lr": 4.4331360777166765e-06, "epoch": 16.535862785862786, "percentage": 82.68, "elapsed_time": "1:16:35", "remaining_time": "0:16:02", "throughput": 1319.56, "total_tokens": 6064176}
6380
+ {"current_steps": 31820, "total_steps": 38480, "loss": 0.0207, "lr": 4.426691716961637e-06, "epoch": 16.53846153846154, "percentage": 82.69, "elapsed_time": "1:16:36", "remaining_time": "0:16:02", "throughput": 1319.57, "total_tokens": 6065104}
6381
+ {"current_steps": 31825, "total_steps": 38480, "loss": 0.1614, "lr": 4.420251588626373e-06, "epoch": 16.54106029106029, "percentage": 82.71, "elapsed_time": "1:16:36", "remaining_time": "0:16:01", "throughput": 1319.57, "total_tokens": 6066064}
6382
+ {"current_steps": 31830, "total_steps": 38480, "loss": 0.184, "lr": 4.413815694035766e-06, "epoch": 16.543659043659044, "percentage": 82.72, "elapsed_time": "1:16:37", "remaining_time": "0:16:00", "throughput": 1319.58, "total_tokens": 6066992}
6383
+ {"current_steps": 31835, "total_steps": 38480, "loss": 0.1876, "lr": 4.407384034513845e-06, "epoch": 16.546257796257795, "percentage": 82.73, "elapsed_time": "1:16:38", "remaining_time": "0:15:59", "throughput": 1319.58, "total_tokens": 6067920}
6384
+ {"current_steps": 31840, "total_steps": 38480, "loss": 0.1643, "lr": 4.400956611383747e-06, "epoch": 16.54885654885655, "percentage": 82.74, "elapsed_time": "1:16:39", "remaining_time": "0:15:59", "throughput": 1319.59, "total_tokens": 6068880}
6385
+ {"current_steps": 31845, "total_steps": 38480, "loss": 0.3654, "lr": 4.394533425967751e-06, "epoch": 16.551455301455302, "percentage": 82.76, "elapsed_time": "1:16:39", "remaining_time": "0:15:58", "throughput": 1319.6, "total_tokens": 6069872}
6386
+ {"current_steps": 31850, "total_steps": 38480, "loss": 0.2746, "lr": 4.388114479587252e-06, "epoch": 16.554054054054053, "percentage": 82.77, "elapsed_time": "1:16:40", "remaining_time": "0:15:57", "throughput": 1319.61, "total_tokens": 6070832}
6387
+ {"current_steps": 31855, "total_steps": 38480, "loss": 0.1382, "lr": 4.381699773562789e-06, "epoch": 16.556652806652806, "percentage": 82.78, "elapsed_time": "1:16:41", "remaining_time": "0:15:56", "throughput": 1319.6, "total_tokens": 6071728}
6388
+ {"current_steps": 31860, "total_steps": 38480, "loss": 0.1423, "lr": 4.375289309214023e-06, "epoch": 16.55925155925156, "percentage": 82.8, "elapsed_time": "1:16:41", "remaining_time": "0:15:56", "throughput": 1319.61, "total_tokens": 6072688}
6389
+ {"current_steps": 31865, "total_steps": 38480, "loss": 0.083, "lr": 4.368883087859721e-06, "epoch": 16.56185031185031, "percentage": 82.81, "elapsed_time": "1:16:42", "remaining_time": "0:15:55", "throughput": 1319.61, "total_tokens": 6073616}
6390
+ {"current_steps": 31870, "total_steps": 38480, "loss": 0.2707, "lr": 4.3624811108178125e-06, "epoch": 16.564449064449065, "percentage": 82.82, "elapsed_time": "1:16:43", "remaining_time": "0:15:54", "throughput": 1319.61, "total_tokens": 6074512}
6391
+ {"current_steps": 31875, "total_steps": 38480, "loss": 0.2503, "lr": 4.3560833794053295e-06, "epoch": 16.56704781704782, "percentage": 82.84, "elapsed_time": "1:16:43", "remaining_time": "0:15:54", "throughput": 1319.61, "total_tokens": 6075440}
6392
+ {"current_steps": 31880, "total_steps": 38480, "loss": 0.1268, "lr": 4.349689894938433e-06, "epoch": 16.56964656964657, "percentage": 82.85, "elapsed_time": "1:16:44", "remaining_time": "0:15:53", "throughput": 1319.62, "total_tokens": 6076400}
6393
+ {"current_steps": 31885, "total_steps": 38480, "loss": 0.1518, "lr": 4.34330065873243e-06, "epoch": 16.572245322245323, "percentage": 82.86, "elapsed_time": "1:16:45", "remaining_time": "0:15:52", "throughput": 1319.63, "total_tokens": 6077360}
6394
+ {"current_steps": 31890, "total_steps": 38480, "loss": 0.058, "lr": 4.336915672101727e-06, "epoch": 16.574844074844076, "percentage": 82.87, "elapsed_time": "1:16:46", "remaining_time": "0:15:51", "throughput": 1319.61, "total_tokens": 6078224}
6395
+ {"current_steps": 31895, "total_steps": 38480, "loss": 0.0988, "lr": 4.330534936359873e-06, "epoch": 16.577442827442827, "percentage": 82.89, "elapsed_time": "1:16:46", "remaining_time": "0:15:51", "throughput": 1319.62, "total_tokens": 6079184}
6396
+ {"current_steps": 31900, "total_steps": 38480, "loss": 0.087, "lr": 4.3241584528195295e-06, "epoch": 16.58004158004158, "percentage": 82.9, "elapsed_time": "1:16:47", "remaining_time": "0:15:50", "throughput": 1319.63, "total_tokens": 6080144}
6397
+ {"current_steps": 31905, "total_steps": 38480, "loss": 0.1108, "lr": 4.317786222792502e-06, "epoch": 16.58264033264033, "percentage": 82.91, "elapsed_time": "1:16:48", "remaining_time": "0:15:49", "throughput": 1319.63, "total_tokens": 6081072}
6398
+ {"current_steps": 31910, "total_steps": 38480, "loss": 0.0567, "lr": 4.311418247589705e-06, "epoch": 16.585239085239085, "percentage": 82.93, "elapsed_time": "1:16:48", "remaining_time": "0:15:48", "throughput": 1319.64, "total_tokens": 6082000}
6399
+ {"current_steps": 31915, "total_steps": 38480, "loss": 0.1743, "lr": 4.305054528521183e-06, "epoch": 16.58783783783784, "percentage": 82.94, "elapsed_time": "1:16:49", "remaining_time": "0:15:48", "throughput": 1319.65, "total_tokens": 6082992}
6400
+ {"current_steps": 31920, "total_steps": 38480, "loss": 0.0979, "lr": 4.2986950668960996e-06, "epoch": 16.59043659043659, "percentage": 82.95, "elapsed_time": "1:16:50", "remaining_time": "0:15:47", "throughput": 1319.65, "total_tokens": 6083888}
6401
+ {"current_steps": 31925, "total_steps": 38480, "loss": 0.1146, "lr": 4.2923398640227625e-06, "epoch": 16.593035343035343, "percentage": 82.97, "elapsed_time": "1:16:50", "remaining_time": "0:15:46", "throughput": 1319.64, "total_tokens": 6084784}
6402
+ {"current_steps": 31930, "total_steps": 38480, "loss": 0.1002, "lr": 4.285988921208573e-06, "epoch": 16.595634095634097, "percentage": 82.98, "elapsed_time": "1:16:51", "remaining_time": "0:15:46", "throughput": 1319.66, "total_tokens": 6085776}
6403
+ {"current_steps": 31935, "total_steps": 38480, "loss": 0.1523, "lr": 4.279642239760071e-06, "epoch": 16.598232848232847, "percentage": 82.99, "elapsed_time": "1:16:52", "remaining_time": "0:15:45", "throughput": 1319.66, "total_tokens": 6086704}
6404
+ {"current_steps": 31940, "total_steps": 38480, "loss": 0.1951, "lr": 4.273299820982932e-06, "epoch": 16.6008316008316, "percentage": 83.0, "elapsed_time": "1:16:53", "remaining_time": "0:15:44", "throughput": 1319.66, "total_tokens": 6087632}
6405
+ {"current_steps": 31945, "total_steps": 38480, "loss": 0.2305, "lr": 4.266961666181935e-06, "epoch": 16.603430353430355, "percentage": 83.02, "elapsed_time": "1:16:53", "remaining_time": "0:15:43", "throughput": 1319.66, "total_tokens": 6088560}
6406
+ {"current_steps": 31950, "total_steps": 38480, "loss": 0.0814, "lr": 4.260627776660992e-06, "epoch": 16.606029106029105, "percentage": 83.03, "elapsed_time": "1:16:54", "remaining_time": "0:15:43", "throughput": 1319.67, "total_tokens": 6089520}
6407
+ {"current_steps": 31955, "total_steps": 38480, "loss": 0.2083, "lr": 4.254298153723127e-06, "epoch": 16.60862785862786, "percentage": 83.04, "elapsed_time": "1:16:55", "remaining_time": "0:15:42", "throughput": 1319.69, "total_tokens": 6090512}
6408
+ {"current_steps": 31960, "total_steps": 38480, "loss": 0.2255, "lr": 4.24797279867051e-06, "epoch": 16.611226611226613, "percentage": 83.06, "elapsed_time": "1:16:55", "remaining_time": "0:15:41", "throughput": 1319.69, "total_tokens": 6091440}
6409
+ {"current_steps": 31965, "total_steps": 38480, "loss": 0.1493, "lr": 4.241651712804407e-06, "epoch": 16.613825363825363, "percentage": 83.07, "elapsed_time": "1:16:56", "remaining_time": "0:15:40", "throughput": 1319.73, "total_tokens": 6092528}
6410
+ {"current_steps": 31970, "total_steps": 38480, "loss": 0.0874, "lr": 4.235334897425216e-06, "epoch": 16.616424116424117, "percentage": 83.08, "elapsed_time": "1:16:57", "remaining_time": "0:15:40", "throughput": 1319.73, "total_tokens": 6093488}
6411
+ {"current_steps": 31975, "total_steps": 38480, "loss": 0.1387, "lr": 4.229022353832454e-06, "epoch": 16.61902286902287, "percentage": 83.1, "elapsed_time": "1:16:57", "remaining_time": "0:15:39", "throughput": 1319.76, "total_tokens": 6094544}
6412
+ {"current_steps": 31980, "total_steps": 38480, "loss": 0.1584, "lr": 4.222714083324769e-06, "epoch": 16.62162162162162, "percentage": 83.11, "elapsed_time": "1:16:58", "remaining_time": "0:15:38", "throughput": 1319.76, "total_tokens": 6095440}
6413
+ {"current_steps": 31985, "total_steps": 38480, "loss": 0.053, "lr": 4.216410087199921e-06, "epoch": 16.624220374220375, "percentage": 83.12, "elapsed_time": "1:16:59", "remaining_time": "0:15:38", "throughput": 1319.77, "total_tokens": 6096400}
6414
+ {"current_steps": 31990, "total_steps": 38480, "loss": 0.1342, "lr": 4.2101103667547905e-06, "epoch": 16.626819126819125, "percentage": 83.13, "elapsed_time": "1:17:00", "remaining_time": "0:15:37", "throughput": 1319.78, "total_tokens": 6097392}
6415
+ {"current_steps": 31995, "total_steps": 38480, "loss": 0.1785, "lr": 4.203814923285379e-06, "epoch": 16.62941787941788, "percentage": 83.15, "elapsed_time": "1:17:00", "remaining_time": "0:15:36", "throughput": 1319.79, "total_tokens": 6098352}
6416
+ {"current_steps": 32000, "total_steps": 38480, "loss": 0.236, "lr": 4.197523758086813e-06, "epoch": 16.632016632016633, "percentage": 83.16, "elapsed_time": "1:17:01", "remaining_time": "0:15:35", "throughput": 1319.81, "total_tokens": 6099376}
6417
+ {"current_steps": 32005, "total_steps": 38480, "loss": 0.146, "lr": 4.1912368724533216e-06, "epoch": 16.634615384615383, "percentage": 83.17, "elapsed_time": "1:17:02", "remaining_time": "0:15:35", "throughput": 1319.81, "total_tokens": 6100304}
6418
+ {"current_steps": 32010, "total_steps": 38480, "loss": 0.2173, "lr": 4.184954267678287e-06, "epoch": 16.637214137214137, "percentage": 83.19, "elapsed_time": "1:17:02", "remaining_time": "0:15:34", "throughput": 1319.81, "total_tokens": 6101200}
6419
+ {"current_steps": 32015, "total_steps": 38480, "loss": 0.1421, "lr": 4.178675945054181e-06, "epoch": 16.63981288981289, "percentage": 83.2, "elapsed_time": "1:17:03", "remaining_time": "0:15:33", "throughput": 1319.8, "total_tokens": 6102064}
6420
+ {"current_steps": 32020, "total_steps": 38480, "loss": 0.244, "lr": 4.172401905872605e-06, "epoch": 16.64241164241164, "percentage": 83.21, "elapsed_time": "1:17:04", "remaining_time": "0:15:32", "throughput": 1319.81, "total_tokens": 6103056}
6421
+ {"current_steps": 32025, "total_steps": 38480, "loss": 0.1499, "lr": 4.16613215142427e-06, "epoch": 16.645010395010395, "percentage": 83.23, "elapsed_time": "1:17:04", "remaining_time": "0:15:32", "throughput": 1319.82, "total_tokens": 6104016}
6422
+ {"current_steps": 32030, "total_steps": 38480, "loss": 0.1585, "lr": 4.15986668299903e-06, "epoch": 16.64760914760915, "percentage": 83.24, "elapsed_time": "1:17:05", "remaining_time": "0:15:31", "throughput": 1319.83, "total_tokens": 6104976}
6423
+ {"current_steps": 32035, "total_steps": 38480, "loss": 0.2672, "lr": 4.153605501885835e-06, "epoch": 16.6502079002079, "percentage": 83.25, "elapsed_time": "1:17:06", "remaining_time": "0:15:30", "throughput": 1319.83, "total_tokens": 6105936}
6424
+ {"current_steps": 32040, "total_steps": 38480, "loss": 0.123, "lr": 4.1473486093727535e-06, "epoch": 16.652806652806653, "percentage": 83.26, "elapsed_time": "1:17:06", "remaining_time": "0:15:30", "throughput": 1319.84, "total_tokens": 6106896}
6425
+ {"current_steps": 32045, "total_steps": 38480, "loss": 0.1792, "lr": 4.141096006746975e-06, "epoch": 16.655405405405407, "percentage": 83.28, "elapsed_time": "1:17:07", "remaining_time": "0:15:29", "throughput": 1319.85, "total_tokens": 6107856}
6426
+ {"current_steps": 32050, "total_steps": 38480, "loss": 0.1873, "lr": 4.134847695294825e-06, "epoch": 16.658004158004157, "percentage": 83.29, "elapsed_time": "1:17:08", "remaining_time": "0:15:28", "throughput": 1319.85, "total_tokens": 6108784}
6427
+ {"current_steps": 32055, "total_steps": 38480, "loss": 0.2046, "lr": 4.128603676301723e-06, "epoch": 16.66060291060291, "percentage": 83.3, "elapsed_time": "1:17:09", "remaining_time": "0:15:27", "throughput": 1319.87, "total_tokens": 6109776}
6428
+ {"current_steps": 32060, "total_steps": 38480, "loss": 0.1829, "lr": 4.122363951052197e-06, "epoch": 16.66320166320166, "percentage": 83.32, "elapsed_time": "1:17:09", "remaining_time": "0:15:27", "throughput": 1319.87, "total_tokens": 6110704}
6429
+ {"current_steps": 32065, "total_steps": 38480, "loss": 0.1382, "lr": 4.116128520829926e-06, "epoch": 16.665800415800415, "percentage": 83.33, "elapsed_time": "1:17:10", "remaining_time": "0:15:26", "throughput": 1319.88, "total_tokens": 6111664}
6430
+ {"current_steps": 32070, "total_steps": 38480, "loss": 0.1649, "lr": 4.10989738691768e-06, "epoch": 16.66839916839917, "percentage": 83.34, "elapsed_time": "1:17:11", "remaining_time": "0:15:25", "throughput": 1319.88, "total_tokens": 6112592}
6431
+ {"current_steps": 32075, "total_steps": 38480, "loss": 0.0982, "lr": 4.103670550597355e-06, "epoch": 16.67099792099792, "percentage": 83.35, "elapsed_time": "1:17:11", "remaining_time": "0:15:24", "throughput": 1319.89, "total_tokens": 6113584}
6432
+ {"current_steps": 32080, "total_steps": 38480, "loss": 0.2221, "lr": 4.097448013149949e-06, "epoch": 16.673596673596673, "percentage": 83.37, "elapsed_time": "1:17:12", "remaining_time": "0:15:24", "throughput": 1319.91, "total_tokens": 6114576}
6433
+ {"current_steps": 32085, "total_steps": 38480, "loss": 0.1037, "lr": 4.091229775855598e-06, "epoch": 16.676195426195427, "percentage": 83.38, "elapsed_time": "1:17:13", "remaining_time": "0:15:23", "throughput": 1319.91, "total_tokens": 6115504}
6434
+ {"current_steps": 32090, "total_steps": 38480, "loss": 0.2041, "lr": 4.085015839993539e-06, "epoch": 16.678794178794178, "percentage": 83.39, "elapsed_time": "1:17:13", "remaining_time": "0:15:22", "throughput": 1319.94, "total_tokens": 6116560}
6435
+ {"current_steps": 32095, "total_steps": 38480, "loss": 0.0353, "lr": 4.078806206842126e-06, "epoch": 16.68139293139293, "percentage": 83.41, "elapsed_time": "1:17:14", "remaining_time": "0:15:22", "throughput": 1319.92, "total_tokens": 6117424}
6436
+ {"current_steps": 32100, "total_steps": 38480, "loss": 0.1153, "lr": 4.07260087767882e-06, "epoch": 16.683991683991685, "percentage": 83.42, "elapsed_time": "1:17:15", "remaining_time": "0:15:21", "throughput": 1319.92, "total_tokens": 6118352}
6437
+ {"current_steps": 32105, "total_steps": 38480, "loss": 0.2455, "lr": 4.06639985378022e-06, "epoch": 16.686590436590436, "percentage": 83.43, "elapsed_time": "1:17:16", "remaining_time": "0:15:20", "throughput": 1319.95, "total_tokens": 6119376}
6438
+ {"current_steps": 32110, "total_steps": 38480, "loss": 0.1138, "lr": 4.0602031364220164e-06, "epoch": 16.68918918918919, "percentage": 83.45, "elapsed_time": "1:17:16", "remaining_time": "0:15:19", "throughput": 1319.95, "total_tokens": 6120336}
6439
+ {"current_steps": 32115, "total_steps": 38480, "loss": 0.1482, "lr": 4.054010726879015e-06, "epoch": 16.691787941787943, "percentage": 83.46, "elapsed_time": "1:17:17", "remaining_time": "0:15:19", "throughput": 1319.96, "total_tokens": 6121296}
6440
+ {"current_steps": 32120, "total_steps": 38480, "loss": 0.1401, "lr": 4.047822626425163e-06, "epoch": 16.694386694386694, "percentage": 83.47, "elapsed_time": "1:17:18", "remaining_time": "0:15:18", "throughput": 1319.96, "total_tokens": 6122224}
6441
+ {"current_steps": 32125, "total_steps": 38480, "loss": 0.0716, "lr": 4.0416388363334805e-06, "epoch": 16.696985446985448, "percentage": 83.48, "elapsed_time": "1:17:18", "remaining_time": "0:15:17", "throughput": 1319.95, "total_tokens": 6123120}
6442
+ {"current_steps": 32130, "total_steps": 38480, "loss": 0.1979, "lr": 4.0354593578761205e-06, "epoch": 16.6995841995842, "percentage": 83.5, "elapsed_time": "1:17:19", "remaining_time": "0:15:16", "throughput": 1319.97, "total_tokens": 6124112}
6443
+ {"current_steps": 32135, "total_steps": 38480, "loss": 0.144, "lr": 4.029284192324362e-06, "epoch": 16.70218295218295, "percentage": 83.51, "elapsed_time": "1:17:20", "remaining_time": "0:15:16", "throughput": 1319.97, "total_tokens": 6125040}
6444
+ {"current_steps": 32140, "total_steps": 38480, "loss": 0.1684, "lr": 4.0231133409485754e-06, "epoch": 16.704781704781706, "percentage": 83.52, "elapsed_time": "1:17:20", "remaining_time": "0:15:15", "throughput": 1319.98, "total_tokens": 6126032}
6445
+ {"current_steps": 32145, "total_steps": 38480, "loss": 0.1673, "lr": 4.016946805018254e-06, "epoch": 16.707380457380456, "percentage": 83.54, "elapsed_time": "1:17:21", "remaining_time": "0:15:14", "throughput": 1319.98, "total_tokens": 6126960}
6446
+ {"current_steps": 32150, "total_steps": 38480, "loss": 0.1056, "lr": 4.010784585801994e-06, "epoch": 16.70997920997921, "percentage": 83.55, "elapsed_time": "1:17:22", "remaining_time": "0:15:14", "throughput": 1320.0, "total_tokens": 6127952}
6447
+ {"current_steps": 32155, "total_steps": 38480, "loss": 0.1211, "lr": 4.004626684567523e-06, "epoch": 16.712577962577964, "percentage": 83.56, "elapsed_time": "1:17:23", "remaining_time": "0:15:13", "throughput": 1320.0, "total_tokens": 6128880}
6448
+ {"current_steps": 32160, "total_steps": 38480, "loss": 0.0903, "lr": 3.99847310258166e-06, "epoch": 16.715176715176714, "percentage": 83.58, "elapsed_time": "1:17:23", "remaining_time": "0:15:12", "throughput": 1320.01, "total_tokens": 6129872}
6449
+ {"current_steps": 32165, "total_steps": 38480, "loss": 0.1283, "lr": 3.992323841110348e-06, "epoch": 16.717775467775468, "percentage": 83.59, "elapsed_time": "1:17:24", "remaining_time": "0:15:11", "throughput": 1320.03, "total_tokens": 6130864}
6450
+ {"current_steps": 32170, "total_steps": 38480, "loss": 0.1638, "lr": 3.986178901418627e-06, "epoch": 16.72037422037422, "percentage": 83.6, "elapsed_time": "1:17:25", "remaining_time": "0:15:11", "throughput": 1320.04, "total_tokens": 6131824}
6451
+ {"current_steps": 32175, "total_steps": 38480, "loss": 0.1285, "lr": 3.980038284770671e-06, "epoch": 16.722972972972972, "percentage": 83.61, "elapsed_time": "1:17:25", "remaining_time": "0:15:10", "throughput": 1320.04, "total_tokens": 6132752}
6452
+ {"current_steps": 32180, "total_steps": 38480, "loss": 0.2188, "lr": 3.973901992429746e-06, "epoch": 16.725571725571726, "percentage": 83.63, "elapsed_time": "1:17:26", "remaining_time": "0:15:09", "throughput": 1320.04, "total_tokens": 6133680}
6453
+ {"current_steps": 32185, "total_steps": 38480, "loss": 0.1413, "lr": 3.967770025658232e-06, "epoch": 16.72817047817048, "percentage": 83.64, "elapsed_time": "1:17:27", "remaining_time": "0:15:08", "throughput": 1320.07, "total_tokens": 6134736}
6454
+ {"current_steps": 32190, "total_steps": 38480, "loss": 0.0989, "lr": 3.96164238571762e-06, "epoch": 16.73076923076923, "percentage": 83.65, "elapsed_time": "1:17:27", "remaining_time": "0:15:08", "throughput": 1320.06, "total_tokens": 6135632}
6455
+ {"current_steps": 32195, "total_steps": 38480, "loss": 0.2616, "lr": 3.955519073868513e-06, "epoch": 16.733367983367984, "percentage": 83.67, "elapsed_time": "1:17:28", "remaining_time": "0:15:07", "throughput": 1320.07, "total_tokens": 6136592}
6456
+ {"current_steps": 32200, "total_steps": 38480, "loss": 0.1448, "lr": 3.949400091370623e-06, "epoch": 16.735966735966738, "percentage": 83.68, "elapsed_time": "1:17:29", "remaining_time": "0:15:06", "throughput": 1320.08, "total_tokens": 6137552}
6457
+ {"current_steps": 32205, "total_steps": 38480, "loss": 0.0561, "lr": 3.943285439482763e-06, "epoch": 16.738565488565488, "percentage": 83.69, "elapsed_time": "1:17:30", "remaining_time": "0:15:06", "throughput": 1320.07, "total_tokens": 6138448}
6458
+ {"current_steps": 32210, "total_steps": 38480, "loss": 0.1871, "lr": 3.937175119462874e-06, "epoch": 16.741164241164242, "percentage": 83.71, "elapsed_time": "1:17:30", "remaining_time": "0:15:05", "throughput": 1320.09, "total_tokens": 6139472}
6459
+ {"current_steps": 32215, "total_steps": 38480, "loss": 0.1577, "lr": 3.931069132567991e-06, "epoch": 16.743762993762992, "percentage": 83.72, "elapsed_time": "1:17:31", "remaining_time": "0:15:04", "throughput": 1320.08, "total_tokens": 6140368}
6460
+ {"current_steps": 32220, "total_steps": 38480, "loss": 0.1389, "lr": 3.924967480054256e-06, "epoch": 16.746361746361746, "percentage": 83.73, "elapsed_time": "1:17:32", "remaining_time": "0:15:03", "throughput": 1320.08, "total_tokens": 6141296}
6461
+ {"current_steps": 32225, "total_steps": 38480, "loss": 0.2107, "lr": 3.918870163176924e-06, "epoch": 16.7489604989605, "percentage": 83.74, "elapsed_time": "1:17:32", "remaining_time": "0:15:03", "throughput": 1320.09, "total_tokens": 6142256}
6462
+ {"current_steps": 32230, "total_steps": 38480, "loss": 0.1387, "lr": 3.912777183190369e-06, "epoch": 16.75155925155925, "percentage": 83.76, "elapsed_time": "1:17:33", "remaining_time": "0:15:02", "throughput": 1320.12, "total_tokens": 6143312}
6463
+ {"current_steps": 32235, "total_steps": 38480, "loss": 0.1974, "lr": 3.906688541348055e-06, "epoch": 16.754158004158004, "percentage": 83.77, "elapsed_time": "1:17:34", "remaining_time": "0:15:01", "throughput": 1320.13, "total_tokens": 6144272}
6464
+ {"current_steps": 32240, "total_steps": 38480, "loss": 0.1287, "lr": 3.900604238902556e-06, "epoch": 16.756756756756758, "percentage": 83.78, "elapsed_time": "1:17:35", "remaining_time": "0:15:00", "throughput": 1320.14, "total_tokens": 6145232}
6465
+ {"current_steps": 32245, "total_steps": 38480, "loss": 0.0648, "lr": 3.894524277105565e-06, "epoch": 16.759355509355508, "percentage": 83.8, "elapsed_time": "1:17:35", "remaining_time": "0:15:00", "throughput": 1320.14, "total_tokens": 6146192}
6466
+ {"current_steps": 32250, "total_steps": 38480, "loss": 0.1575, "lr": 3.888448657207883e-06, "epoch": 16.761954261954262, "percentage": 83.81, "elapsed_time": "1:17:36", "remaining_time": "0:14:59", "throughput": 1320.14, "total_tokens": 6147120}
6467
+ {"current_steps": 32255, "total_steps": 38480, "loss": 0.2861, "lr": 3.8823773804593875e-06, "epoch": 16.764553014553016, "percentage": 83.82, "elapsed_time": "1:17:37", "remaining_time": "0:14:58", "throughput": 1320.15, "total_tokens": 6148048}
6468
+ {"current_steps": 32260, "total_steps": 38480, "loss": 0.1042, "lr": 3.876310448109102e-06, "epoch": 16.767151767151766, "percentage": 83.84, "elapsed_time": "1:17:37", "remaining_time": "0:14:58", "throughput": 1320.17, "total_tokens": 6149104}
6469
+ {"current_steps": 32265, "total_steps": 38480, "loss": 0.0464, "lr": 3.8702478614051355e-06, "epoch": 16.76975051975052, "percentage": 83.85, "elapsed_time": "1:17:38", "remaining_time": "0:14:57", "throughput": 1320.18, "total_tokens": 6150032}
6470
+ {"current_steps": 32270, "total_steps": 38480, "loss": 0.1136, "lr": 3.864189621594702e-06, "epoch": 16.772349272349274, "percentage": 83.86, "elapsed_time": "1:17:39", "remaining_time": "0:14:56", "throughput": 1320.18, "total_tokens": 6150992}
6471
+ {"current_steps": 32275, "total_steps": 38480, "loss": 0.0616, "lr": 3.8581357299241255e-06, "epoch": 16.774948024948024, "percentage": 83.87, "elapsed_time": "1:17:39", "remaining_time": "0:14:55", "throughput": 1320.2, "total_tokens": 6151984}
6472
+ {"current_steps": 32280, "total_steps": 38480, "loss": 0.1409, "lr": 3.852086187638846e-06, "epoch": 16.777546777546778, "percentage": 83.89, "elapsed_time": "1:17:40", "remaining_time": "0:14:55", "throughput": 1320.21, "total_tokens": 6152944}
6473
+ {"current_steps": 32285, "total_steps": 38480, "loss": 0.2007, "lr": 3.846040995983391e-06, "epoch": 16.78014553014553, "percentage": 83.9, "elapsed_time": "1:17:41", "remaining_time": "0:14:54", "throughput": 1320.22, "total_tokens": 6153904}
6474
+ {"current_steps": 32290, "total_steps": 38480, "loss": 0.1061, "lr": 3.840000156201401e-06, "epoch": 16.782744282744282, "percentage": 83.91, "elapsed_time": "1:17:41", "remaining_time": "0:14:53", "throughput": 1320.23, "total_tokens": 6154864}
6475
+ {"current_steps": 32295, "total_steps": 38480, "loss": 0.1093, "lr": 3.833963669535615e-06, "epoch": 16.785343035343036, "percentage": 83.93, "elapsed_time": "1:17:42", "remaining_time": "0:14:52", "throughput": 1320.21, "total_tokens": 6155728}
6476
+ {"current_steps": 32300, "total_steps": 38480, "loss": 0.1167, "lr": 3.827931537227894e-06, "epoch": 16.787941787941786, "percentage": 83.94, "elapsed_time": "1:17:43", "remaining_time": "0:14:52", "throughput": 1320.15, "total_tokens": 6156656}
6477
+ {"current_steps": 32305, "total_steps": 38480, "loss": 0.1637, "lr": 3.821903760519188e-06, "epoch": 16.79054054054054, "percentage": 83.95, "elapsed_time": "1:17:44", "remaining_time": "0:14:51", "throughput": 1320.16, "total_tokens": 6157616}
6478
+ {"current_steps": 32310, "total_steps": 38480, "loss": 0.0548, "lr": 3.815880340649549e-06, "epoch": 16.793139293139294, "percentage": 83.97, "elapsed_time": "1:17:44", "remaining_time": "0:14:50", "throughput": 1320.15, "total_tokens": 6158480}
6479
+ {"current_steps": 32315, "total_steps": 38480, "loss": 0.167, "lr": 3.8098612788581427e-06, "epoch": 16.795738045738045, "percentage": 83.98, "elapsed_time": "1:17:45", "remaining_time": "0:14:50", "throughput": 1320.14, "total_tokens": 6159408}
6480
+ {"current_steps": 32320, "total_steps": 38480, "loss": 0.1973, "lr": 3.8038465763832298e-06, "epoch": 16.7983367983368, "percentage": 83.99, "elapsed_time": "1:17:46", "remaining_time": "0:14:49", "throughput": 1320.17, "total_tokens": 6160464}
6481
+ {"current_steps": 32325, "total_steps": 38480, "loss": 0.0351, "lr": 3.797836234462182e-06, "epoch": 16.800935550935552, "percentage": 84.0, "elapsed_time": "1:17:47", "remaining_time": "0:14:48", "throughput": 1320.17, "total_tokens": 6161360}
6482
+ {"current_steps": 32330, "total_steps": 38480, "loss": 0.0806, "lr": 3.7918302543314606e-06, "epoch": 16.803534303534303, "percentage": 84.02, "elapsed_time": "1:17:47", "remaining_time": "0:14:47", "throughput": 1320.17, "total_tokens": 6162288}
6483
+ {"current_steps": 32335, "total_steps": 38480, "loss": 0.1825, "lr": 3.7858286372266515e-06, "epoch": 16.806133056133056, "percentage": 84.03, "elapsed_time": "1:17:48", "remaining_time": "0:14:47", "throughput": 1320.21, "total_tokens": 6163376}
6484
+ {"current_steps": 32340, "total_steps": 38480, "loss": 0.1167, "lr": 3.7798313843824237e-06, "epoch": 16.80873180873181, "percentage": 84.04, "elapsed_time": "1:17:49", "remaining_time": "0:14:46", "throughput": 1320.21, "total_tokens": 6164336}
6485
+ {"current_steps": 32345, "total_steps": 38480, "loss": 0.0291, "lr": 3.7738384970325586e-06, "epoch": 16.81133056133056, "percentage": 84.06, "elapsed_time": "1:17:49", "remaining_time": "0:14:45", "throughput": 1320.21, "total_tokens": 6165232}
6486
+ {"current_steps": 32350, "total_steps": 38480, "loss": 0.1228, "lr": 3.767849976409926e-06, "epoch": 16.813929313929314, "percentage": 84.07, "elapsed_time": "1:17:50", "remaining_time": "0:14:45", "throughput": 1320.22, "total_tokens": 6166192}
6487
+ {"current_steps": 32355, "total_steps": 38480, "loss": 0.1026, "lr": 3.7618658237465216e-06, "epoch": 16.816528066528065, "percentage": 84.08, "elapsed_time": "1:17:51", "remaining_time": "0:14:44", "throughput": 1320.22, "total_tokens": 6167120}
6488
+ {"current_steps": 32360, "total_steps": 38480, "loss": 0.1047, "lr": 3.7558860402734206e-06, "epoch": 16.81912681912682, "percentage": 84.1, "elapsed_time": "1:17:51", "remaining_time": "0:14:43", "throughput": 1320.22, "total_tokens": 6168080}
6489
+ {"current_steps": 32365, "total_steps": 38480, "loss": 0.1601, "lr": 3.749910627220801e-06, "epoch": 16.821725571725572, "percentage": 84.11, "elapsed_time": "1:17:52", "remaining_time": "0:14:42", "throughput": 1320.23, "total_tokens": 6169008}
6490
+ {"current_steps": 32370, "total_steps": 38480, "loss": 0.127, "lr": 3.7439395858179625e-06, "epoch": 16.824324324324323, "percentage": 84.12, "elapsed_time": "1:17:53", "remaining_time": "0:14:42", "throughput": 1320.25, "total_tokens": 6170032}
6491
+ {"current_steps": 32375, "total_steps": 38480, "loss": 0.0919, "lr": 3.737972917293281e-06, "epoch": 16.826923076923077, "percentage": 84.13, "elapsed_time": "1:17:54", "remaining_time": "0:14:41", "throughput": 1320.26, "total_tokens": 6170992}
6492
+ {"current_steps": 32380, "total_steps": 38480, "loss": 0.128, "lr": 3.7320106228742462e-06, "epoch": 16.82952182952183, "percentage": 84.15, "elapsed_time": "1:17:54", "remaining_time": "0:14:40", "throughput": 1320.25, "total_tokens": 6171888}
6493
+ {"current_steps": 32385, "total_steps": 38480, "loss": 0.1545, "lr": 3.726052703787439e-06, "epoch": 16.83212058212058, "percentage": 84.16, "elapsed_time": "1:17:55", "remaining_time": "0:14:39", "throughput": 1320.26, "total_tokens": 6172848}
6494
+ {"current_steps": 32390, "total_steps": 38480, "loss": 0.1242, "lr": 3.72009916125855e-06, "epoch": 16.834719334719335, "percentage": 84.17, "elapsed_time": "1:17:56", "remaining_time": "0:14:39", "throughput": 1320.26, "total_tokens": 6173776}
6495
+ {"current_steps": 32395, "total_steps": 38480, "loss": 0.2496, "lr": 3.7141499965123616e-06, "epoch": 16.83731808731809, "percentage": 84.19, "elapsed_time": "1:17:56", "remaining_time": "0:14:38", "throughput": 1320.27, "total_tokens": 6174768}
6496
+ {"current_steps": 32400, "total_steps": 38480, "loss": 0.1342, "lr": 3.708205210772753e-06, "epoch": 16.83991683991684, "percentage": 84.2, "elapsed_time": "1:17:57", "remaining_time": "0:14:37", "throughput": 1320.28, "total_tokens": 6175728}
6497
+ {"current_steps": 32405, "total_steps": 38480, "loss": 0.1701, "lr": 3.7022648052627228e-06, "epoch": 16.842515592515593, "percentage": 84.21, "elapsed_time": "1:17:58", "remaining_time": "0:14:37", "throughput": 1320.28, "total_tokens": 6176624}
6498
+ {"current_steps": 32410, "total_steps": 38480, "loss": 0.0665, "lr": 3.6963287812043473e-06, "epoch": 16.845114345114347, "percentage": 84.23, "elapsed_time": "1:17:58", "remaining_time": "0:14:36", "throughput": 1320.28, "total_tokens": 6177584}
6499
+ {"current_steps": 32415, "total_steps": 38480, "loss": 0.1164, "lr": 3.6903971398188074e-06, "epoch": 16.847713097713097, "percentage": 84.24, "elapsed_time": "1:17:59", "remaining_time": "0:14:35", "throughput": 1320.29, "total_tokens": 6178512}
6500
+ {"current_steps": 32420, "total_steps": 38480, "loss": 0.1096, "lr": 3.6844698823263748e-06, "epoch": 16.85031185031185, "percentage": 84.25, "elapsed_time": "1:18:00", "remaining_time": "0:14:34", "throughput": 1320.29, "total_tokens": 6179440}
6501
+ {"current_steps": 32425, "total_steps": 38480, "loss": 0.0788, "lr": 3.678547009946445e-06, "epoch": 16.852910602910605, "percentage": 84.26, "elapsed_time": "1:18:01", "remaining_time": "0:14:34", "throughput": 1320.28, "total_tokens": 6180336}
6502
+ {"current_steps": 32430, "total_steps": 38480, "loss": 0.103, "lr": 3.672628523897481e-06, "epoch": 16.855509355509355, "percentage": 84.28, "elapsed_time": "1:18:01", "remaining_time": "0:14:33", "throughput": 1320.28, "total_tokens": 6181264}
6503
+ {"current_steps": 32435, "total_steps": 38480, "loss": 0.0706, "lr": 3.6667144253970637e-06, "epoch": 16.85810810810811, "percentage": 84.29, "elapsed_time": "1:18:02", "remaining_time": "0:14:32", "throughput": 1320.28, "total_tokens": 6182192}
6504
+ {"current_steps": 32440, "total_steps": 38480, "loss": 0.1193, "lr": 3.660804715661853e-06, "epoch": 16.86070686070686, "percentage": 84.3, "elapsed_time": "1:18:03", "remaining_time": "0:14:31", "throughput": 1320.29, "total_tokens": 6183152}
6505
+ {"current_steps": 32445, "total_steps": 38480, "loss": 0.1728, "lr": 3.654899395907632e-06, "epoch": 16.863305613305613, "percentage": 84.32, "elapsed_time": "1:18:03", "remaining_time": "0:14:31", "throughput": 1320.31, "total_tokens": 6184144}
6506
+ {"current_steps": 32450, "total_steps": 38480, "loss": 0.1906, "lr": 3.6489984673492628e-06, "epoch": 16.865904365904367, "percentage": 84.33, "elapsed_time": "1:18:04", "remaining_time": "0:14:30", "throughput": 1320.31, "total_tokens": 6185104}
6507
+ {"current_steps": 32455, "total_steps": 38480, "loss": 0.1998, "lr": 3.6431019312006903e-06, "epoch": 16.868503118503117, "percentage": 84.34, "elapsed_time": "1:18:05", "remaining_time": "0:14:29", "throughput": 1320.32, "total_tokens": 6186032}
6508
+ {"current_steps": 32460, "total_steps": 38480, "loss": 0.1578, "lr": 3.6372097886749917e-06, "epoch": 16.87110187110187, "percentage": 84.36, "elapsed_time": "1:18:05", "remaining_time": "0:14:29", "throughput": 1320.34, "total_tokens": 6187056}
6509
+ {"current_steps": 32465, "total_steps": 38480, "loss": 0.0454, "lr": 3.631322040984317e-06, "epoch": 16.873700623700625, "percentage": 84.37, "elapsed_time": "1:18:06", "remaining_time": "0:14:28", "throughput": 1320.35, "total_tokens": 6188048}
6510
+ {"current_steps": 32470, "total_steps": 38480, "loss": 0.1423, "lr": 3.6254386893399077e-06, "epoch": 16.876299376299375, "percentage": 84.38, "elapsed_time": "1:18:07", "remaining_time": "0:14:27", "throughput": 1320.36, "total_tokens": 6189008}
6511
+ {"current_steps": 32475, "total_steps": 38480, "loss": 0.1369, "lr": 3.619559734952113e-06, "epoch": 16.87889812889813, "percentage": 84.39, "elapsed_time": "1:18:08", "remaining_time": "0:14:26", "throughput": 1320.37, "total_tokens": 6189968}
6512
+ {"current_steps": 32480, "total_steps": 38480, "loss": 0.0861, "lr": 3.613685179030382e-06, "epoch": 16.881496881496883, "percentage": 84.41, "elapsed_time": "1:18:08", "remaining_time": "0:14:26", "throughput": 1320.36, "total_tokens": 6190864}
6513
+ {"current_steps": 32485, "total_steps": 38480, "loss": 0.1452, "lr": 3.607815022783245e-06, "epoch": 16.884095634095633, "percentage": 84.42, "elapsed_time": "1:18:09", "remaining_time": "0:14:25", "throughput": 1320.39, "total_tokens": 6191920}
6514
+ {"current_steps": 32490, "total_steps": 38480, "loss": 0.2113, "lr": 3.6019492674183253e-06, "epoch": 16.886694386694387, "percentage": 84.43, "elapsed_time": "1:18:10", "remaining_time": "0:14:24", "throughput": 1320.4, "total_tokens": 6192848}
6515
+ {"current_steps": 32495, "total_steps": 38480, "loss": 0.0505, "lr": 3.5960879141423627e-06, "epoch": 16.88929313929314, "percentage": 84.45, "elapsed_time": "1:18:10", "remaining_time": "0:14:23", "throughput": 1320.41, "total_tokens": 6193840}
6516
+ {"current_steps": 32500, "total_steps": 38480, "loss": 0.0207, "lr": 3.5902309641611705e-06, "epoch": 16.89189189189189, "percentage": 84.46, "elapsed_time": "1:18:11", "remaining_time": "0:14:23", "throughput": 1320.41, "total_tokens": 6194768}
6517
+ {"current_steps": 32505, "total_steps": 38480, "loss": 0.1159, "lr": 3.5843784186796654e-06, "epoch": 16.894490644490645, "percentage": 84.47, "elapsed_time": "1:18:12", "remaining_time": "0:14:22", "throughput": 1320.42, "total_tokens": 6195728}
6518
+ {"current_steps": 32510, "total_steps": 38480, "loss": 0.1147, "lr": 3.5785302789018454e-06, "epoch": 16.897089397089395, "percentage": 84.49, "elapsed_time": "1:18:12", "remaining_time": "0:14:21", "throughput": 1320.42, "total_tokens": 6196656}
6519
+ {"current_steps": 32515, "total_steps": 38480, "loss": 0.1095, "lr": 3.572686546030832e-06, "epoch": 16.89968814968815, "percentage": 84.5, "elapsed_time": "1:18:13", "remaining_time": "0:14:21", "throughput": 1320.42, "total_tokens": 6197584}
6520
+ {"current_steps": 32520, "total_steps": 38480, "loss": 0.1317, "lr": 3.5668472212688016e-06, "epoch": 16.902286902286903, "percentage": 84.51, "elapsed_time": "1:18:14", "remaining_time": "0:14:20", "throughput": 1320.43, "total_tokens": 6198544}
6521
+ {"current_steps": 32525, "total_steps": 38480, "loss": 0.1305, "lr": 3.5610123058170437e-06, "epoch": 16.904885654885653, "percentage": 84.52, "elapsed_time": "1:18:15", "remaining_time": "0:14:19", "throughput": 1320.44, "total_tokens": 6199536}
6522
+ {"current_steps": 32530, "total_steps": 38480, "loss": 0.1927, "lr": 3.5551818008759506e-06, "epoch": 16.907484407484407, "percentage": 84.54, "elapsed_time": "1:18:15", "remaining_time": "0:14:18", "throughput": 1320.46, "total_tokens": 6200528}
6523
+ {"current_steps": 32535, "total_steps": 38480, "loss": 0.1262, "lr": 3.5493557076449912e-06, "epoch": 16.91008316008316, "percentage": 84.55, "elapsed_time": "1:18:16", "remaining_time": "0:14:18", "throughput": 1320.48, "total_tokens": 6201552}
6524
+ {"current_steps": 32540, "total_steps": 38480, "loss": 0.1613, "lr": 3.5435340273227313e-06, "epoch": 16.91268191268191, "percentage": 84.56, "elapsed_time": "1:18:17", "remaining_time": "0:14:17", "throughput": 1320.48, "total_tokens": 6202480}
6525
+ {"current_steps": 32545, "total_steps": 38480, "loss": 0.2923, "lr": 3.537716761106821e-06, "epoch": 16.915280665280665, "percentage": 84.58, "elapsed_time": "1:18:17", "remaining_time": "0:14:16", "throughput": 1320.5, "total_tokens": 6203504}
6526
+ {"current_steps": 32550, "total_steps": 38480, "loss": 0.1845, "lr": 3.531903910194026e-06, "epoch": 16.91787941787942, "percentage": 84.59, "elapsed_time": "1:18:18", "remaining_time": "0:14:15", "throughput": 1320.51, "total_tokens": 6204464}
6527
+ {"current_steps": 32555, "total_steps": 38480, "loss": 0.1912, "lr": 3.5260954757801807e-06, "epoch": 16.92047817047817, "percentage": 84.6, "elapsed_time": "1:18:19", "remaining_time": "0:14:15", "throughput": 1320.53, "total_tokens": 6205488}
6528
+ {"current_steps": 32560, "total_steps": 38480, "loss": 0.1414, "lr": 3.520291459060218e-06, "epoch": 16.923076923076923, "percentage": 84.62, "elapsed_time": "1:18:19", "remaining_time": "0:14:14", "throughput": 1320.55, "total_tokens": 6206512}
6529
+ {"current_steps": 32565, "total_steps": 38480, "loss": 0.1104, "lr": 3.5144918612281585e-06, "epoch": 16.925675675675677, "percentage": 84.63, "elapsed_time": "1:18:20", "remaining_time": "0:14:13", "throughput": 1320.56, "total_tokens": 6207472}
6530
+ {"current_steps": 32570, "total_steps": 38480, "loss": 0.1088, "lr": 3.508696683477128e-06, "epoch": 16.928274428274428, "percentage": 84.64, "elapsed_time": "1:18:21", "remaining_time": "0:14:13", "throughput": 1320.57, "total_tokens": 6208432}
6531
+ {"current_steps": 32575, "total_steps": 38480, "loss": 0.1685, "lr": 3.5029059269993253e-06, "epoch": 16.93087318087318, "percentage": 84.65, "elapsed_time": "1:18:22", "remaining_time": "0:14:12", "throughput": 1320.57, "total_tokens": 6209392}
6532
+ {"current_steps": 32580, "total_steps": 38480, "loss": 0.1877, "lr": 3.497119592986051e-06, "epoch": 16.933471933471935, "percentage": 84.67, "elapsed_time": "1:18:22", "remaining_time": "0:14:11", "throughput": 1320.57, "total_tokens": 6210320}
6533
+ {"current_steps": 32585, "total_steps": 38480, "loss": 0.0329, "lr": 3.491337682627685e-06, "epoch": 16.936070686070686, "percentage": 84.68, "elapsed_time": "1:18:23", "remaining_time": "0:14:10", "throughput": 1320.58, "total_tokens": 6211280}
6534
+ {"current_steps": 32590, "total_steps": 38480, "loss": 0.09, "lr": 3.485560197113713e-06, "epoch": 16.93866943866944, "percentage": 84.69, "elapsed_time": "1:18:24", "remaining_time": "0:14:10", "throughput": 1320.59, "total_tokens": 6212240}
6535
+ {"current_steps": 32595, "total_steps": 38480, "loss": 0.1311, "lr": 3.4797871376326925e-06, "epoch": 16.94126819126819, "percentage": 84.71, "elapsed_time": "1:18:24", "remaining_time": "0:14:09", "throughput": 1320.6, "total_tokens": 6213200}
6536
+ {"current_steps": 32600, "total_steps": 38480, "loss": 0.1317, "lr": 3.4740185053722814e-06, "epoch": 16.943866943866944, "percentage": 84.72, "elapsed_time": "1:18:25", "remaining_time": "0:14:08", "throughput": 1320.58, "total_tokens": 6214064}
6537
+ {"current_steps": 32605, "total_steps": 38480, "loss": 0.1235, "lr": 3.4682543015192333e-06, "epoch": 16.946465696465697, "percentage": 84.73, "elapsed_time": "1:18:26", "remaining_time": "0:14:08", "throughput": 1320.6, "total_tokens": 6215056}
6538
+ {"current_steps": 32610, "total_steps": 38480, "loss": 0.1719, "lr": 3.4624945272593747e-06, "epoch": 16.949064449064448, "percentage": 84.75, "elapsed_time": "1:18:26", "remaining_time": "0:14:07", "throughput": 1320.61, "total_tokens": 6216048}
6539
+ {"current_steps": 32615, "total_steps": 38480, "loss": 0.1252, "lr": 3.4567391837776243e-06, "epoch": 16.9516632016632, "percentage": 84.76, "elapsed_time": "1:18:27", "remaining_time": "0:14:06", "throughput": 1320.65, "total_tokens": 6217136}
6540
+ {"current_steps": 32620, "total_steps": 38480, "loss": 0.1547, "lr": 3.4509882722580044e-06, "epoch": 16.954261954261955, "percentage": 84.77, "elapsed_time": "1:18:28", "remaining_time": "0:14:05", "throughput": 1320.65, "total_tokens": 6218064}
6541
+ {"current_steps": 32625, "total_steps": 38480, "loss": 0.1108, "lr": 3.4452417938836107e-06, "epoch": 16.956860706860706, "percentage": 84.78, "elapsed_time": "1:18:29", "remaining_time": "0:14:05", "throughput": 1320.64, "total_tokens": 6218960}
6542
+ {"current_steps": 32630, "total_steps": 38480, "loss": 0.0554, "lr": 3.4394997498366277e-06, "epoch": 16.95945945945946, "percentage": 84.8, "elapsed_time": "1:18:29", "remaining_time": "0:14:04", "throughput": 1320.66, "total_tokens": 6219984}
6543
+ {"current_steps": 32635, "total_steps": 38480, "loss": 0.1209, "lr": 3.4337621412983274e-06, "epoch": 16.962058212058214, "percentage": 84.81, "elapsed_time": "1:18:30", "remaining_time": "0:14:03", "throughput": 1320.67, "total_tokens": 6220944}
6544
+ {"current_steps": 32640, "total_steps": 38480, "loss": 0.1821, "lr": 3.428028969449082e-06, "epoch": 16.964656964656964, "percentage": 84.82, "elapsed_time": "1:18:31", "remaining_time": "0:14:02", "throughput": 1320.68, "total_tokens": 6221936}
6545
+ {"current_steps": 32645, "total_steps": 38480, "loss": 0.0836, "lr": 3.422300235468345e-06, "epoch": 16.967255717255718, "percentage": 84.84, "elapsed_time": "1:18:31", "remaining_time": "0:14:02", "throughput": 1320.7, "total_tokens": 6222928}
6546
+ {"current_steps": 32650, "total_steps": 38480, "loss": 0.0751, "lr": 3.4165759405346303e-06, "epoch": 16.96985446985447, "percentage": 84.85, "elapsed_time": "1:18:32", "remaining_time": "0:14:01", "throughput": 1320.71, "total_tokens": 6223920}
6547
+ {"current_steps": 32655, "total_steps": 38480, "loss": 0.1177, "lr": 3.4108560858255866e-06, "epoch": 16.972453222453222, "percentage": 84.86, "elapsed_time": "1:18:33", "remaining_time": "0:14:00", "throughput": 1320.72, "total_tokens": 6224880}
6548
+ {"current_steps": 32660, "total_steps": 38480, "loss": 0.098, "lr": 3.405140672517912e-06, "epoch": 16.975051975051976, "percentage": 84.88, "elapsed_time": "1:18:33", "remaining_time": "0:14:00", "throughput": 1320.73, "total_tokens": 6225840}
6549
+ {"current_steps": 32665, "total_steps": 38480, "loss": 0.1018, "lr": 3.399429701787407e-06, "epoch": 16.977650727650726, "percentage": 84.89, "elapsed_time": "1:18:34", "remaining_time": "0:13:59", "throughput": 1320.73, "total_tokens": 6226800}
6550
+ {"current_steps": 32670, "total_steps": 38480, "loss": 0.1344, "lr": 3.3937231748089493e-06, "epoch": 16.98024948024948, "percentage": 84.9, "elapsed_time": "1:18:35", "remaining_time": "0:13:58", "throughput": 1320.75, "total_tokens": 6227792}
6551
+ {"current_steps": 32675, "total_steps": 38480, "loss": 0.1756, "lr": 3.388021092756516e-06, "epoch": 16.982848232848234, "percentage": 84.91, "elapsed_time": "1:18:36", "remaining_time": "0:13:57", "throughput": 1320.76, "total_tokens": 6228784}
6552
+ {"current_steps": 32680, "total_steps": 38480, "loss": 0.1885, "lr": 3.3823234568031547e-06, "epoch": 16.985446985446984, "percentage": 84.93, "elapsed_time": "1:18:36", "remaining_time": "0:13:57", "throughput": 1320.75, "total_tokens": 6229648}
6553
+ {"current_steps": 32685, "total_steps": 38480, "loss": 0.2153, "lr": 3.376630268121009e-06, "epoch": 16.988045738045738, "percentage": 84.94, "elapsed_time": "1:18:37", "remaining_time": "0:13:56", "throughput": 1320.75, "total_tokens": 6230576}
6554
+ {"current_steps": 32690, "total_steps": 38480, "loss": 0.1301, "lr": 3.370941527881297e-06, "epoch": 16.990644490644492, "percentage": 84.95, "elapsed_time": "1:18:38", "remaining_time": "0:13:55", "throughput": 1320.74, "total_tokens": 6231472}
6555
+ {"current_steps": 32695, "total_steps": 38480, "loss": 0.1214, "lr": 3.365257237254335e-06, "epoch": 16.993243243243242, "percentage": 84.97, "elapsed_time": "1:18:38", "remaining_time": "0:13:54", "throughput": 1320.75, "total_tokens": 6232432}
6556
+ {"current_steps": 32700, "total_steps": 38480, "loss": 0.0362, "lr": 3.3595773974095163e-06, "epoch": 16.995841995841996, "percentage": 84.98, "elapsed_time": "1:18:39", "remaining_time": "0:13:54", "throughput": 1320.76, "total_tokens": 6233392}
6557
+ {"current_steps": 32705, "total_steps": 38480, "loss": 0.1196, "lr": 3.353902009515317e-06, "epoch": 16.99844074844075, "percentage": 84.99, "elapsed_time": "1:18:40", "remaining_time": "0:13:53", "throughput": 1320.78, "total_tokens": 6234384}
6558
+ {"current_steps": 32708, "total_steps": 38480, "eval_loss": 0.14667558670043945, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "1:18:48", "remaining_time": "0:13:54", "throughput": 1318.52, "total_tokens": 6234920}
6559
+ {"current_steps": 32710, "total_steps": 38480, "loss": 0.1168, "lr": 3.3482310747393e-06, "epoch": 17.0010395010395, "percentage": 85.01, "elapsed_time": "1:18:50", "remaining_time": "0:13:54", "throughput": 1318.21, "total_tokens": 6235336}
6560
+ {"current_steps": 32715, "total_steps": 38480, "loss": 0.1974, "lr": 3.3425645942481126e-06, "epoch": 17.003638253638254, "percentage": 85.02, "elapsed_time": "1:18:50", "remaining_time": "0:13:53", "throughput": 1318.17, "total_tokens": 6236264}
6561
+ {"current_steps": 32720, "total_steps": 38480, "loss": 0.0464, "lr": 3.336902569207484e-06, "epoch": 17.006237006237008, "percentage": 85.03, "elapsed_time": "1:18:51", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6237224}