rbelanec commited on
Commit
2ab144e
·
verified ·
1 Parent(s): feff059

Training in progress, step 16520

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +165 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cdf053bce4a09b556887d86be29e6556aba5cb876d1fde71b6ca47a2d74d3df
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e653316ed51c5d355ef66e5613c89d11c1e8b5731b7db406a01e8fb3f0e4c9a0
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -3158,3 +3158,168 @@
3158
  {"current_steps": 15695, "total_steps": 16520, "loss": 0.0011, "lr": 7.59612349389599e-06, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:51:48", "remaining_time": "0:02:43", "throughput": 2068.41, "total_tokens": 6429832}
3159
  {"current_steps": 15700, "total_steps": 16520, "loss": 0.0008, "lr": 7.50466912679143e-06, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:51:49", "remaining_time": "0:02:42", "throughput": 2068.41, "total_tokens": 6431848}
3160
  {"current_steps": 15705, "total_steps": 16520, "loss": 0.0019, "lr": 7.413764473596596e-06, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:51:50", "remaining_time": "0:02:41", "throughput": 2068.45, "total_tokens": 6433960}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3158
  {"current_steps": 15695, "total_steps": 16520, "loss": 0.0011, "lr": 7.59612349389599e-06, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:51:48", "remaining_time": "0:02:43", "throughput": 2068.41, "total_tokens": 6429832}
3159
  {"current_steps": 15700, "total_steps": 16520, "loss": 0.0008, "lr": 7.50466912679143e-06, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:51:49", "remaining_time": "0:02:42", "throughput": 2068.41, "total_tokens": 6431848}
3160
  {"current_steps": 15705, "total_steps": 16520, "loss": 0.0019, "lr": 7.413764473596596e-06, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:51:50", "remaining_time": "0:02:41", "throughput": 2068.45, "total_tokens": 6433960}
3161
+ {"current_steps": 15710, "total_steps": 16520, "loss": 0.0011, "lr": 7.3234096357775444e-06, "epoch": 19.019370460048425, "percentage": 95.1, "elapsed_time": "0:51:51", "remaining_time": "0:02:40", "throughput": 2068.47, "total_tokens": 6435944}
3162
+ {"current_steps": 15715, "total_steps": 16520, "loss": 0.0004, "lr": 7.233604714186659e-06, "epoch": 19.02542372881356, "percentage": 95.13, "elapsed_time": "0:51:52", "remaining_time": "0:02:39", "throughput": 2068.5, "total_tokens": 6437992}
3163
+ {"current_steps": 15720, "total_steps": 16520, "loss": 0.0005, "lr": 7.144349809062422e-06, "epoch": 19.031476997578693, "percentage": 95.16, "elapsed_time": "0:51:53", "remaining_time": "0:02:38", "throughput": 2068.54, "total_tokens": 6440104}
3164
+ {"current_steps": 15725, "total_steps": 16520, "loss": 0.0004, "lr": 7.055645020029533e-06, "epoch": 19.037530266343826, "percentage": 95.19, "elapsed_time": "0:51:54", "remaining_time": "0:02:37", "throughput": 2068.56, "total_tokens": 6442152}
3165
+ {"current_steps": 15730, "total_steps": 16520, "loss": 0.0013, "lr": 6.967490446098457e-06, "epoch": 19.043583535108958, "percentage": 95.22, "elapsed_time": "0:51:55", "remaining_time": "0:02:36", "throughput": 2068.6, "total_tokens": 6444200}
3166
+ {"current_steps": 15735, "total_steps": 16520, "loss": 0.0008, "lr": 6.879886185665818e-06, "epoch": 19.04963680387409, "percentage": 95.25, "elapsed_time": "0:51:56", "remaining_time": "0:02:35", "throughput": 2068.64, "total_tokens": 6446216}
3167
+ {"current_steps": 15740, "total_steps": 16520, "loss": 0.0006, "lr": 6.792832336513732e-06, "epoch": 19.055690072639226, "percentage": 95.28, "elapsed_time": "0:51:57", "remaining_time": "0:02:34", "throughput": 2068.7, "total_tokens": 6448360}
3168
+ {"current_steps": 15745, "total_steps": 16520, "loss": 0.0005, "lr": 6.7063289958099735e-06, "epoch": 19.06174334140436, "percentage": 95.31, "elapsed_time": "0:51:58", "remaining_time": "0:02:33", "throughput": 2068.74, "total_tokens": 6450440}
3169
+ {"current_steps": 15750, "total_steps": 16520, "loss": 0.002, "lr": 6.620376260108141e-06, "epoch": 19.06779661016949, "percentage": 95.34, "elapsed_time": "0:51:59", "remaining_time": "0:02:32", "throughput": 2068.79, "total_tokens": 6452552}
3170
+ {"current_steps": 15755, "total_steps": 16520, "loss": 0.0005, "lr": 6.534974225346824e-06, "epoch": 19.073849878934624, "percentage": 95.37, "elapsed_time": "0:51:59", "remaining_time": "0:02:31", "throughput": 2068.8, "total_tokens": 6454568}
3171
+ {"current_steps": 15760, "total_steps": 16520, "loss": 0.0014, "lr": 6.450122986850216e-06, "epoch": 19.079903147699756, "percentage": 95.4, "elapsed_time": "0:52:00", "remaining_time": "0:02:30", "throughput": 2068.81, "total_tokens": 6456552}
3172
+ {"current_steps": 15765, "total_steps": 16520, "loss": 0.0007, "lr": 6.365822639327723e-06, "epoch": 19.085956416464892, "percentage": 95.43, "elapsed_time": "0:52:01", "remaining_time": "0:02:29", "throughput": 2068.83, "total_tokens": 6458600}
3173
+ {"current_steps": 15770, "total_steps": 16520, "loss": 0.0009, "lr": 6.2820732768736895e-06, "epoch": 19.092009685230025, "percentage": 95.46, "elapsed_time": "0:52:02", "remaining_time": "0:02:28", "throughput": 2068.83, "total_tokens": 6460552}
3174
+ {"current_steps": 15775, "total_steps": 16520, "loss": 0.0351, "lr": 6.198874992967673e-06, "epoch": 19.098062953995157, "percentage": 95.49, "elapsed_time": "0:52:03", "remaining_time": "0:02:27", "throughput": 2068.86, "total_tokens": 6462600}
3175
+ {"current_steps": 15780, "total_steps": 16520, "loss": 0.0007, "lr": 6.11622788047389e-06, "epoch": 19.10411622276029, "percentage": 95.52, "elapsed_time": "0:52:04", "remaining_time": "0:02:26", "throughput": 2068.88, "total_tokens": 6464520}
3176
+ {"current_steps": 15785, "total_steps": 16520, "loss": 0.0006, "lr": 6.034132031641548e-06, "epoch": 19.110169491525422, "percentage": 95.55, "elapsed_time": "0:52:05", "remaining_time": "0:02:25", "throughput": 2068.91, "total_tokens": 6466440}
3177
+ {"current_steps": 15790, "total_steps": 16520, "loss": 0.0012, "lr": 5.9525875381044035e-06, "epoch": 19.116222760290558, "percentage": 95.58, "elapsed_time": "0:52:06", "remaining_time": "0:02:24", "throughput": 2068.91, "total_tokens": 6468424}
3178
+ {"current_steps": 15795, "total_steps": 16520, "loss": 0.001, "lr": 5.871594490880927e-06, "epoch": 19.12227602905569, "percentage": 95.61, "elapsed_time": "0:52:07", "remaining_time": "0:02:23", "throughput": 2068.96, "total_tokens": 6470472}
3179
+ {"current_steps": 15800, "total_steps": 16520, "loss": 0.0018, "lr": 5.791152980374082e-06, "epoch": 19.128329297820823, "percentage": 95.64, "elapsed_time": "0:52:08", "remaining_time": "0:02:22", "throughput": 2069.0, "total_tokens": 6472456}
3180
+ {"current_steps": 15805, "total_steps": 16520, "loss": 0.0005, "lr": 5.711263096370989e-06, "epoch": 19.134382566585955, "percentage": 95.67, "elapsed_time": "0:52:09", "remaining_time": "0:02:21", "throughput": 2069.01, "total_tokens": 6474344}
3181
+ {"current_steps": 15810, "total_steps": 16520, "loss": 0.0008, "lr": 5.631924928043319e-06, "epoch": 19.140435835351088, "percentage": 95.7, "elapsed_time": "0:52:10", "remaining_time": "0:02:20", "throughput": 2069.03, "total_tokens": 6476168}
3182
+ {"current_steps": 15815, "total_steps": 16520, "loss": 0.0008, "lr": 5.553138563946847e-06, "epoch": 19.146489104116224, "percentage": 95.73, "elapsed_time": "0:52:11", "remaining_time": "0:02:19", "throughput": 2069.08, "total_tokens": 6478312}
3183
+ {"current_steps": 15820, "total_steps": 16520, "loss": 0.0008, "lr": 5.4749040920212824e-06, "epoch": 19.152542372881356, "percentage": 95.76, "elapsed_time": "0:52:11", "remaining_time": "0:02:18", "throughput": 2069.12, "total_tokens": 6480360}
3184
+ {"current_steps": 15825, "total_steps": 16520, "loss": 0.0007, "lr": 5.397221599590607e-06, "epoch": 19.15859564164649, "percentage": 95.79, "elapsed_time": "0:52:12", "remaining_time": "0:02:17", "throughput": 2069.18, "total_tokens": 6482536}
3185
+ {"current_steps": 15830, "total_steps": 16520, "loss": 0.0008, "lr": 5.3200911733624625e-06, "epoch": 19.16464891041162, "percentage": 95.82, "elapsed_time": "0:52:13", "remaining_time": "0:02:16", "throughput": 2069.22, "total_tokens": 6484584}
3186
+ {"current_steps": 15835, "total_steps": 16520, "loss": 0.0009, "lr": 5.243512899428315e-06, "epoch": 19.170702179176754, "percentage": 95.85, "elapsed_time": "0:52:14", "remaining_time": "0:02:15", "throughput": 2069.3, "total_tokens": 6486888}
3187
+ {"current_steps": 15840, "total_steps": 16520, "loss": 0.0008, "lr": 5.167486863263515e-06, "epoch": 19.17675544794189, "percentage": 95.88, "elapsed_time": "0:52:15", "remaining_time": "0:02:14", "throughput": 2069.3, "total_tokens": 6488840}
3188
+ {"current_steps": 15845, "total_steps": 16520, "loss": 0.0009, "lr": 5.092013149726737e-06, "epoch": 19.182808716707022, "percentage": 95.91, "elapsed_time": "0:52:16", "remaining_time": "0:02:13", "throughput": 2069.31, "total_tokens": 6490696}
3189
+ {"current_steps": 15850, "total_steps": 16520, "loss": 0.0004, "lr": 5.017091843060317e-06, "epoch": 19.188861985472155, "percentage": 95.94, "elapsed_time": "0:52:17", "remaining_time": "0:02:12", "throughput": 2069.35, "total_tokens": 6492808}
3190
+ {"current_steps": 15855, "total_steps": 16520, "loss": 0.0013, "lr": 4.942723026890139e-06, "epoch": 19.194915254237287, "percentage": 95.97, "elapsed_time": "0:52:18", "remaining_time": "0:02:11", "throughput": 2069.38, "total_tokens": 6494760}
3191
+ {"current_steps": 15860, "total_steps": 16520, "loss": 0.0007, "lr": 4.86890678422508e-06, "epoch": 19.20096852300242, "percentage": 96.0, "elapsed_time": "0:52:19", "remaining_time": "0:02:10", "throughput": 2069.42, "total_tokens": 6496840}
3192
+ {"current_steps": 15865, "total_steps": 16520, "loss": 0.0006, "lr": 4.795643197457511e-06, "epoch": 19.207021791767556, "percentage": 96.04, "elapsed_time": "0:52:20", "remaining_time": "0:02:09", "throughput": 2069.47, "total_tokens": 6498920}
3193
+ {"current_steps": 15870, "total_steps": 16520, "loss": 0.0008, "lr": 4.722932348362851e-06, "epoch": 19.213075060532688, "percentage": 96.07, "elapsed_time": "0:52:21", "remaining_time": "0:02:08", "throughput": 2069.49, "total_tokens": 6500968}
3194
+ {"current_steps": 15875, "total_steps": 16520, "loss": 0.0005, "lr": 4.650774318099571e-06, "epoch": 19.21912832929782, "percentage": 96.1, "elapsed_time": "0:52:22", "remaining_time": "0:02:07", "throughput": 2069.55, "total_tokens": 6503144}
3195
+ {"current_steps": 15880, "total_steps": 16520, "loss": 0.001, "lr": 4.57916918720902e-06, "epoch": 19.225181598062953, "percentage": 96.13, "elapsed_time": "0:52:23", "remaining_time": "0:02:06", "throughput": 2069.6, "total_tokens": 6505224}
3196
+ {"current_steps": 15885, "total_steps": 16520, "loss": 0.0014, "lr": 4.5081170356156e-06, "epoch": 19.231234866828085, "percentage": 96.16, "elapsed_time": "0:52:24", "remaining_time": "0:02:05", "throughput": 2069.62, "total_tokens": 6507176}
3197
+ {"current_steps": 15890, "total_steps": 16520, "loss": 0.0014, "lr": 4.4376179426262595e-06, "epoch": 19.23728813559322, "percentage": 96.19, "elapsed_time": "0:52:25", "remaining_time": "0:02:04", "throughput": 2069.65, "total_tokens": 6509256}
3198
+ {"current_steps": 15895, "total_steps": 16520, "loss": 0.002, "lr": 4.367671986930777e-06, "epoch": 19.243341404358354, "percentage": 96.22, "elapsed_time": "0:52:26", "remaining_time": "0:02:03", "throughput": 2069.59, "total_tokens": 6511144}
3199
+ {"current_steps": 15900, "total_steps": 16520, "loss": 0.0027, "lr": 4.298279246601533e-06, "epoch": 19.249394673123486, "percentage": 96.25, "elapsed_time": "0:52:27", "remaining_time": "0:02:02", "throughput": 2069.62, "total_tokens": 6513128}
3200
+ {"current_steps": 15905, "total_steps": 16520, "loss": 0.0019, "lr": 4.229439799093293e-06, "epoch": 19.25544794188862, "percentage": 96.28, "elapsed_time": "0:52:27", "remaining_time": "0:02:01", "throughput": 2069.63, "total_tokens": 6515080}
3201
+ {"current_steps": 15910, "total_steps": 16520, "loss": 0.0009, "lr": 4.161153721243482e-06, "epoch": 19.26150121065375, "percentage": 96.31, "elapsed_time": "0:52:28", "remaining_time": "0:02:00", "throughput": 2069.68, "total_tokens": 6517192}
3202
+ {"current_steps": 15915, "total_steps": 16520, "loss": 0.0006, "lr": 4.093421089271576e-06, "epoch": 19.267554479418887, "percentage": 96.34, "elapsed_time": "0:52:29", "remaining_time": "0:01:59", "throughput": 2069.72, "total_tokens": 6519240}
3203
+ {"current_steps": 15920, "total_steps": 16520, "loss": 0.0008, "lr": 4.026241978779599e-06, "epoch": 19.27360774818402, "percentage": 96.37, "elapsed_time": "0:52:30", "remaining_time": "0:01:58", "throughput": 2069.74, "total_tokens": 6521256}
3204
+ {"current_steps": 15925, "total_steps": 16520, "loss": 0.0004, "lr": 3.9596164647514035e-06, "epoch": 19.279661016949152, "percentage": 96.4, "elapsed_time": "0:52:31", "remaining_time": "0:01:57", "throughput": 2069.78, "total_tokens": 6523400}
3205
+ {"current_steps": 15930, "total_steps": 16520, "loss": 0.0016, "lr": 3.89354462155328e-06, "epoch": 19.285714285714285, "percentage": 96.43, "elapsed_time": "0:52:32", "remaining_time": "0:01:56", "throughput": 2069.81, "total_tokens": 6525416}
3206
+ {"current_steps": 15935, "total_steps": 16520, "loss": 0.0004, "lr": 3.828026522933292e-06, "epoch": 19.291767554479417, "percentage": 96.46, "elapsed_time": "0:52:33", "remaining_time": "0:01:55", "throughput": 2069.86, "total_tokens": 6527496}
3207
+ {"current_steps": 15940, "total_steps": 16520, "loss": 0.0023, "lr": 3.7630622420214978e-06, "epoch": 19.297820823244553, "percentage": 96.49, "elapsed_time": "0:52:34", "remaining_time": "0:01:54", "throughput": 2069.87, "total_tokens": 6529512}
3208
+ {"current_steps": 15945, "total_steps": 16520, "loss": 0.0008, "lr": 3.6986518513298374e-06, "epoch": 19.303874092009686, "percentage": 96.52, "elapsed_time": "0:52:35", "remaining_time": "0:01:53", "throughput": 2069.92, "total_tokens": 6531688}
3209
+ {"current_steps": 15950, "total_steps": 16520, "loss": 0.0017, "lr": 3.6347954227518574e-06, "epoch": 19.309927360774818, "percentage": 96.55, "elapsed_time": "0:52:36", "remaining_time": "0:01:52", "throughput": 2069.93, "total_tokens": 6533544}
3210
+ {"current_steps": 15955, "total_steps": 16520, "loss": 0.0117, "lr": 3.5714930275629887e-06, "epoch": 19.31598062953995, "percentage": 96.58, "elapsed_time": "0:52:37", "remaining_time": "0:01:51", "throughput": 2069.96, "total_tokens": 6535592}
3211
+ {"current_steps": 15960, "total_steps": 16520, "loss": 0.0009, "lr": 3.508744736420155e-06, "epoch": 19.322033898305083, "percentage": 96.61, "elapsed_time": "0:52:38", "remaining_time": "0:01:50", "throughput": 2070.02, "total_tokens": 6537768}
3212
+ {"current_steps": 15965, "total_steps": 16520, "loss": 0.0003, "lr": 3.4465506193617215e-06, "epoch": 19.32808716707022, "percentage": 96.64, "elapsed_time": "0:52:39", "remaining_time": "0:01:49", "throughput": 2070.05, "total_tokens": 6539720}
3213
+ {"current_steps": 15970, "total_steps": 16520, "loss": 0.0023, "lr": 3.384910745807657e-06, "epoch": 19.33414043583535, "percentage": 96.67, "elapsed_time": "0:52:40", "remaining_time": "0:01:48", "throughput": 2070.07, "total_tokens": 6541608}
3214
+ {"current_steps": 15975, "total_steps": 16520, "loss": 0.0005, "lr": 3.3238251845592037e-06, "epoch": 19.340193704600484, "percentage": 96.7, "elapsed_time": "0:52:41", "remaining_time": "0:01:47", "throughput": 2070.09, "total_tokens": 6543592}
3215
+ {"current_steps": 15980, "total_steps": 16520, "loss": 0.0005, "lr": 3.2632940037988225e-06, "epoch": 19.346246973365616, "percentage": 96.73, "elapsed_time": "0:52:41", "remaining_time": "0:01:46", "throughput": 2070.16, "total_tokens": 6545800}
3216
+ {"current_steps": 15985, "total_steps": 16520, "loss": 0.0005, "lr": 3.2033172710904114e-06, "epoch": 19.352300242130752, "percentage": 96.76, "elapsed_time": "0:52:42", "remaining_time": "0:01:45", "throughput": 2070.2, "total_tokens": 6547912}
3217
+ {"current_steps": 15990, "total_steps": 16520, "loss": 0.0009, "lr": 3.143895053378698e-06, "epoch": 19.358353510895885, "percentage": 96.79, "elapsed_time": "0:52:43", "remaining_time": "0:01:44", "throughput": 2070.28, "total_tokens": 6550088}
3218
+ {"current_steps": 15995, "total_steps": 16520, "loss": 0.0005, "lr": 3.0850274169897386e-06, "epoch": 19.364406779661017, "percentage": 96.82, "elapsed_time": "0:52:44", "remaining_time": "0:01:43", "throughput": 2070.32, "total_tokens": 6552200}
3219
+ {"current_steps": 16000, "total_steps": 16520, "loss": 0.0022, "lr": 3.0267144276304726e-06, "epoch": 19.37046004842615, "percentage": 96.85, "elapsed_time": "0:52:45", "remaining_time": "0:01:42", "throughput": 2070.34, "total_tokens": 6554184}
3220
+ {"current_steps": 16005, "total_steps": 16520, "loss": 0.0004, "lr": 2.9689561503886685e-06, "epoch": 19.376513317191282, "percentage": 96.88, "elapsed_time": "0:52:46", "remaining_time": "0:01:41", "throughput": 2070.36, "total_tokens": 6556200}
3221
+ {"current_steps": 16010, "total_steps": 16520, "loss": 0.0017, "lr": 2.911752649733035e-06, "epoch": 19.38256658595642, "percentage": 96.91, "elapsed_time": "0:52:47", "remaining_time": "0:01:40", "throughput": 2070.4, "total_tokens": 6558312}
3222
+ {"current_steps": 16015, "total_steps": 16520, "loss": 0.0009, "lr": 2.855103989513108e-06, "epoch": 19.38861985472155, "percentage": 96.94, "elapsed_time": "0:52:48", "remaining_time": "0:01:39", "throughput": 2070.47, "total_tokens": 6560456}
3223
+ {"current_steps": 16020, "total_steps": 16520, "loss": 0.0007, "lr": 2.7990102329589206e-06, "epoch": 19.394673123486683, "percentage": 96.97, "elapsed_time": "0:52:49", "remaining_time": "0:01:38", "throughput": 2070.52, "total_tokens": 6562568}
3224
+ {"current_steps": 16025, "total_steps": 16520, "loss": 0.0005, "lr": 2.7434714426813334e-06, "epoch": 19.400726392251816, "percentage": 97.0, "elapsed_time": "0:52:50", "remaining_time": "0:01:37", "throughput": 2070.56, "total_tokens": 6564584}
3225
+ {"current_steps": 16030, "total_steps": 16520, "loss": 0.0005, "lr": 2.688487680671703e-06, "epoch": 19.406779661016948, "percentage": 97.03, "elapsed_time": "0:52:51", "remaining_time": "0:01:36", "throughput": 2070.56, "total_tokens": 6566568}
3226
+ {"current_steps": 16035, "total_steps": 16520, "loss": 0.0015, "lr": 2.6340590083018257e-06, "epoch": 19.412832929782084, "percentage": 97.06, "elapsed_time": "0:52:52", "remaining_time": "0:01:35", "throughput": 2070.58, "total_tokens": 6568456}
3227
+ {"current_steps": 16040, "total_steps": 16520, "loss": 0.0007, "lr": 2.580185486323994e-06, "epoch": 19.418886198547217, "percentage": 97.09, "elapsed_time": "0:52:53", "remaining_time": "0:01:34", "throughput": 2070.61, "total_tokens": 6570440}
3228
+ {"current_steps": 16045, "total_steps": 16520, "loss": 0.002, "lr": 2.5268671748707173e-06, "epoch": 19.42493946731235, "percentage": 97.12, "elapsed_time": "0:52:54", "remaining_time": "0:01:33", "throughput": 2070.6, "total_tokens": 6572392}
3229
+ {"current_steps": 16050, "total_steps": 16520, "loss": 0.0005, "lr": 2.4741041334549463e-06, "epoch": 19.43099273607748, "percentage": 97.15, "elapsed_time": "0:52:55", "remaining_time": "0:01:32", "throughput": 2070.62, "total_tokens": 6574408}
3230
+ {"current_steps": 16055, "total_steps": 16520, "loss": 0.001, "lr": 2.421896420969738e-06, "epoch": 19.437046004842614, "percentage": 97.19, "elapsed_time": "0:52:55", "remaining_time": "0:01:31", "throughput": 2070.63, "total_tokens": 6576264}
3231
+ {"current_steps": 16060, "total_steps": 16520, "loss": 0.0005, "lr": 2.3702440956884232e-06, "epoch": 19.44309927360775, "percentage": 97.22, "elapsed_time": "0:52:56", "remaining_time": "0:01:30", "throughput": 2070.66, "total_tokens": 6578280}
3232
+ {"current_steps": 16065, "total_steps": 16520, "loss": 0.0014, "lr": 2.3191472152642724e-06, "epoch": 19.449152542372882, "percentage": 97.25, "elapsed_time": "0:52:57", "remaining_time": "0:01:30", "throughput": 2070.69, "total_tokens": 6580328}
3233
+ {"current_steps": 16070, "total_steps": 16520, "loss": 0.001, "lr": 2.268605836730664e-06, "epoch": 19.455205811138015, "percentage": 97.28, "elapsed_time": "0:52:58", "remaining_time": "0:01:29", "throughput": 2070.71, "total_tokens": 6582376}
3234
+ {"current_steps": 16075, "total_steps": 16520, "loss": 0.0126, "lr": 2.2186200165009162e-06, "epoch": 19.461259079903147, "percentage": 97.31, "elapsed_time": "0:52:59", "remaining_time": "0:01:28", "throughput": 2070.77, "total_tokens": 6584552}
3235
+ {"current_steps": 16080, "total_steps": 16520, "loss": 0.0013, "lr": 2.1691898103682882e-06, "epoch": 19.46731234866828, "percentage": 97.34, "elapsed_time": "0:53:00", "remaining_time": "0:01:27", "throughput": 2070.79, "total_tokens": 6586504}
3236
+ {"current_steps": 16085, "total_steps": 16520, "loss": 0.0006, "lr": 2.120315273505813e-06, "epoch": 19.473365617433416, "percentage": 97.37, "elapsed_time": "0:53:01", "remaining_time": "0:01:26", "throughput": 2070.85, "total_tokens": 6588648}
3237
+ {"current_steps": 16090, "total_steps": 16520, "loss": 0.0012, "lr": 2.071996460466297e-06, "epoch": 19.479418886198548, "percentage": 97.4, "elapsed_time": "0:53:02", "remaining_time": "0:01:25", "throughput": 2070.89, "total_tokens": 6590760}
3238
+ {"current_steps": 16095, "total_steps": 16520, "loss": 0.0012, "lr": 2.02423342518232e-06, "epoch": 19.48547215496368, "percentage": 97.43, "elapsed_time": "0:53:03", "remaining_time": "0:01:24", "throughput": 2070.96, "total_tokens": 6593000}
3239
+ {"current_steps": 16100, "total_steps": 16520, "loss": 0.0026, "lr": 1.9770262209660717e-06, "epoch": 19.491525423728813, "percentage": 97.46, "elapsed_time": "0:53:04", "remaining_time": "0:01:23", "throughput": 2071.02, "total_tokens": 6595176}
3240
+ {"current_steps": 16105, "total_steps": 16520, "loss": 0.0005, "lr": 1.9303749005092906e-06, "epoch": 19.497578692493946, "percentage": 97.49, "elapsed_time": "0:53:05", "remaining_time": "0:01:22", "throughput": 2071.04, "total_tokens": 6597224}
3241
+ {"current_steps": 16110, "total_steps": 16520, "loss": 0.001, "lr": 1.8842795158833247e-06, "epoch": 19.50363196125908, "percentage": 97.52, "elapsed_time": "0:53:06", "remaining_time": "0:01:21", "throughput": 2071.09, "total_tokens": 6599304}
3242
+ {"current_steps": 16115, "total_steps": 16520, "loss": 0.0027, "lr": 1.8387401185389064e-06, "epoch": 19.509685230024214, "percentage": 97.55, "elapsed_time": "0:53:07", "remaining_time": "0:01:20", "throughput": 2071.01, "total_tokens": 6601288}
3243
+ {"current_steps": 16120, "total_steps": 16520, "loss": 0.0008, "lr": 1.7937567593063198e-06, "epoch": 19.515738498789347, "percentage": 97.58, "elapsed_time": "0:53:08", "remaining_time": "0:01:19", "throughput": 2071.04, "total_tokens": 6603368}
3244
+ {"current_steps": 16125, "total_steps": 16520, "loss": 0.0014, "lr": 1.7493294883951239e-06, "epoch": 19.52179176755448, "percentage": 97.61, "elapsed_time": "0:53:09", "remaining_time": "0:01:18", "throughput": 2071.05, "total_tokens": 6605352}
3245
+ {"current_steps": 16130, "total_steps": 16520, "loss": 0.0007, "lr": 1.7054583553941516e-06, "epoch": 19.52784503631961, "percentage": 97.64, "elapsed_time": "0:53:10", "remaining_time": "0:01:17", "throughput": 2071.09, "total_tokens": 6607240}
3246
+ {"current_steps": 16135, "total_steps": 16520, "loss": 0.0009, "lr": 1.6621434092716214e-06, "epoch": 19.533898305084747, "percentage": 97.67, "elapsed_time": "0:53:11", "remaining_time": "0:01:16", "throughput": 2071.1, "total_tokens": 6609256}
3247
+ {"current_steps": 16140, "total_steps": 16520, "loss": 0.0014, "lr": 1.619384698374693e-06, "epoch": 19.53995157384988, "percentage": 97.7, "elapsed_time": "0:53:12", "remaining_time": "0:01:15", "throughput": 2071.18, "total_tokens": 6611528}
3248
+ {"current_steps": 16145, "total_steps": 16520, "loss": 0.0014, "lr": 1.5771822704299666e-06, "epoch": 19.546004842615012, "percentage": 97.73, "elapsed_time": "0:53:13", "remaining_time": "0:01:14", "throughput": 2071.22, "total_tokens": 6613576}
3249
+ {"current_steps": 16150, "total_steps": 16520, "loss": 0.0008, "lr": 1.5355361725428728e-06, "epoch": 19.552058111380145, "percentage": 97.76, "elapsed_time": "0:53:14", "remaining_time": "0:01:13", "throughput": 2071.27, "total_tokens": 6615656}
3250
+ {"current_steps": 16155, "total_steps": 16520, "loss": 0.0005, "lr": 1.494446451198117e-06, "epoch": 19.558111380145277, "percentage": 97.79, "elapsed_time": "0:53:14", "remaining_time": "0:01:12", "throughput": 2071.29, "total_tokens": 6617640}
3251
+ {"current_steps": 16160, "total_steps": 16520, "loss": 0.0005, "lr": 1.4539131522591786e-06, "epoch": 19.564164648910413, "percentage": 97.82, "elapsed_time": "0:53:15", "remaining_time": "0:01:11", "throughput": 2071.36, "total_tokens": 6619848}
3252
+ {"current_steps": 16165, "total_steps": 16520, "loss": 0.0004, "lr": 1.4139363209685895e-06, "epoch": 19.570217917675546, "percentage": 97.85, "elapsed_time": "0:53:16", "remaining_time": "0:01:10", "throughput": 2071.42, "total_tokens": 6622024}
3253
+ {"current_steps": 16170, "total_steps": 16520, "loss": 0.0006, "lr": 1.3745160019476565e-06, "epoch": 19.576271186440678, "percentage": 97.88, "elapsed_time": "0:53:17", "remaining_time": "0:01:09", "throughput": 2071.44, "total_tokens": 6624072}
3254
+ {"current_steps": 16175, "total_steps": 16520, "loss": 0.0005, "lr": 1.3356522391967385e-06, "epoch": 19.58232445520581, "percentage": 97.91, "elapsed_time": "0:53:18", "remaining_time": "0:01:08", "throughput": 2071.48, "total_tokens": 6626120}
3255
+ {"current_steps": 16180, "total_steps": 16520, "loss": 0.001, "lr": 1.2973450760946914e-06, "epoch": 19.588377723970943, "percentage": 97.94, "elapsed_time": "0:53:19", "remaining_time": "0:01:07", "throughput": 2071.51, "total_tokens": 6628168}
3256
+ {"current_steps": 16185, "total_steps": 16520, "loss": 0.0005, "lr": 1.2595945553992573e-06, "epoch": 19.59443099273608, "percentage": 97.97, "elapsed_time": "0:53:20", "remaining_time": "0:01:06", "throughput": 2071.57, "total_tokens": 6630376}
3257
+ {"current_steps": 16190, "total_steps": 16520, "loss": 0.0009, "lr": 1.2224007192468967e-06, "epoch": 19.60048426150121, "percentage": 98.0, "elapsed_time": "0:53:21", "remaining_time": "0:01:05", "throughput": 2071.62, "total_tokens": 6632520}
3258
+ {"current_steps": 16195, "total_steps": 16520, "loss": 0.0008, "lr": 1.1857636091526791e-06, "epoch": 19.606537530266344, "percentage": 98.03, "elapsed_time": "0:53:22", "remaining_time": "0:01:04", "throughput": 2071.66, "total_tokens": 6634568}
3259
+ {"current_steps": 16200, "total_steps": 16520, "loss": 0.0006, "lr": 1.1496832660101708e-06, "epoch": 19.612590799031477, "percentage": 98.06, "elapsed_time": "0:53:23", "remaining_time": "0:01:03", "throughput": 2071.7, "total_tokens": 6636520}
3260
+ {"current_steps": 16205, "total_steps": 16520, "loss": 0.0007, "lr": 1.1141597300916018e-06, "epoch": 19.61864406779661, "percentage": 98.09, "elapsed_time": "0:53:24", "remaining_time": "0:01:02", "throughput": 2071.7, "total_tokens": 6638408}
3261
+ {"current_steps": 16210, "total_steps": 16520, "loss": 0.0005, "lr": 1.079193041047699e-06, "epoch": 19.624697336561745, "percentage": 98.12, "elapsed_time": "0:53:25", "remaining_time": "0:01:01", "throughput": 2071.76, "total_tokens": 6640616}
3262
+ {"current_steps": 16215, "total_steps": 16520, "loss": 0.0014, "lr": 1.0447832379075761e-06, "epoch": 19.630750605326877, "percentage": 98.15, "elapsed_time": "0:53:26", "remaining_time": "0:01:00", "throughput": 2071.81, "total_tokens": 6642696}
3263
+ {"current_steps": 16220, "total_steps": 16520, "loss": 0.0005, "lr": 1.0109303590787877e-06, "epoch": 19.63680387409201, "percentage": 98.18, "elapsed_time": "0:53:27", "remaining_time": "0:00:59", "throughput": 2071.83, "total_tokens": 6644744}
3264
+ {"current_steps": 16225, "total_steps": 16520, "loss": 0.0007, "lr": 9.77634442347275e-07, "epoch": 19.642857142857142, "percentage": 98.21, "elapsed_time": "0:53:28", "remaining_time": "0:00:58", "throughput": 2071.88, "total_tokens": 6646888}
3265
+ {"current_steps": 16230, "total_steps": 16520, "loss": 0.0031, "lr": 9.44895524877365e-07, "epoch": 19.648910411622275, "percentage": 98.24, "elapsed_time": "0:53:29", "remaining_time": "0:00:57", "throughput": 2071.95, "total_tokens": 6649128}
3266
+ {"current_steps": 16235, "total_steps": 16520, "loss": 0.0013, "lr": 9.12713643211549e-07, "epoch": 19.65496368038741, "percentage": 98.27, "elapsed_time": "0:53:30", "remaining_time": "0:00:56", "throughput": 2072.0, "total_tokens": 6651272}
3267
+ {"current_steps": 16240, "total_steps": 16520, "loss": 0.0007, "lr": 8.810888332707046e-07, "epoch": 19.661016949152543, "percentage": 98.31, "elapsed_time": "0:53:30", "remaining_time": "0:00:55", "throughput": 2072.02, "total_tokens": 6653256}
3268
+ {"current_steps": 16245, "total_steps": 16520, "loss": 0.0008, "lr": 8.500211303537619e-07, "epoch": 19.667070217917676, "percentage": 98.34, "elapsed_time": "0:53:31", "remaining_time": "0:00:54", "throughput": 2072.05, "total_tokens": 6655240}
3269
+ {"current_steps": 16250, "total_steps": 16520, "loss": 0.002, "lr": 8.195105691379268e-07, "epoch": 19.673123486682808, "percentage": 98.37, "elapsed_time": "0:53:32", "remaining_time": "0:00:53", "throughput": 2072.05, "total_tokens": 6657160}
3270
+ {"current_steps": 16255, "total_steps": 16520, "loss": 0.0005, "lr": 7.895571836785131e-07, "epoch": 19.67917675544794, "percentage": 98.4, "elapsed_time": "0:53:33", "remaining_time": "0:00:52", "throughput": 2072.09, "total_tokens": 6659144}
3271
+ {"current_steps": 16260, "total_steps": 16520, "loss": 0.0012, "lr": 7.601610074088883e-07, "epoch": 19.685230024213077, "percentage": 98.43, "elapsed_time": "0:53:34", "remaining_time": "0:00:51", "throughput": 2072.11, "total_tokens": 6661160}
3272
+ {"current_steps": 16265, "total_steps": 16520, "loss": 0.0009, "lr": 7.313220731405835e-07, "epoch": 19.69128329297821, "percentage": 98.46, "elapsed_time": "0:53:35", "remaining_time": "0:00:50", "throughput": 2072.17, "total_tokens": 6663272}
3273
+ {"current_steps": 16270, "total_steps": 16520, "loss": 0.0006, "lr": 7.030404130629608e-07, "epoch": 19.69733656174334, "percentage": 98.49, "elapsed_time": "0:53:36", "remaining_time": "0:00:49", "throughput": 2072.2, "total_tokens": 6665288}
3274
+ {"current_steps": 16275, "total_steps": 16520, "loss": 0.0006, "lr": 6.753160587435469e-07, "epoch": 19.703389830508474, "percentage": 98.52, "elapsed_time": "0:53:37", "remaining_time": "0:00:48", "throughput": 2072.22, "total_tokens": 6667336}
3275
+ {"current_steps": 16280, "total_steps": 16520, "loss": 0.0012, "lr": 6.481490411277546e-07, "epoch": 19.709443099273606, "percentage": 98.55, "elapsed_time": "0:53:38", "remaining_time": "0:00:47", "throughput": 2072.25, "total_tokens": 6669288}
3276
+ {"current_steps": 16285, "total_steps": 16520, "loss": 0.0007, "lr": 6.215393905388278e-07, "epoch": 19.715496368038743, "percentage": 98.58, "elapsed_time": "0:53:39", "remaining_time": "0:00:46", "throughput": 2072.29, "total_tokens": 6671368}
3277
+ {"current_steps": 16290, "total_steps": 16520, "loss": 0.001, "lr": 5.954871366779524e-07, "epoch": 19.721549636803875, "percentage": 98.61, "elapsed_time": "0:53:40", "remaining_time": "0:00:45", "throughput": 2072.33, "total_tokens": 6673448}
3278
+ {"current_steps": 16295, "total_steps": 16520, "loss": 0.0019, "lr": 5.699923086242564e-07, "epoch": 19.727602905569007, "percentage": 98.64, "elapsed_time": "0:53:41", "remaining_time": "0:00:44", "throughput": 2072.35, "total_tokens": 6675432}
3279
+ {"current_steps": 16300, "total_steps": 16520, "loss": 0.0015, "lr": 5.450549348344213e-07, "epoch": 19.73365617433414, "percentage": 98.67, "elapsed_time": "0:53:42", "remaining_time": "0:00:43", "throughput": 2072.39, "total_tokens": 6677384}
3280
+ {"current_steps": 16305, "total_steps": 16520, "loss": 0.0007, "lr": 5.206750431432372e-07, "epoch": 19.739709443099272, "percentage": 98.7, "elapsed_time": "0:53:43", "remaining_time": "0:00:42", "throughput": 2072.4, "total_tokens": 6679400}
3281
+ {"current_steps": 16310, "total_steps": 16520, "loss": 0.0014, "lr": 4.968526607629364e-07, "epoch": 19.74576271186441, "percentage": 98.73, "elapsed_time": "0:53:43", "remaining_time": "0:00:41", "throughput": 2072.44, "total_tokens": 6681512}
3282
+ {"current_steps": 16315, "total_steps": 16520, "loss": 0.0009, "lr": 4.735878142836936e-07, "epoch": 19.75181598062954, "percentage": 98.76, "elapsed_time": "0:53:44", "remaining_time": "0:00:40", "throughput": 2072.48, "total_tokens": 6683624}
3283
+ {"current_steps": 16320, "total_steps": 16520, "loss": 0.0006, "lr": 4.508805296732921e-07, "epoch": 19.757869249394673, "percentage": 98.79, "elapsed_time": "0:53:45", "remaining_time": "0:00:39", "throughput": 2072.51, "total_tokens": 6685672}
3284
+ {"current_steps": 16325, "total_steps": 16520, "loss": 0.0062, "lr": 4.2873083227706887e-07, "epoch": 19.763922518159806, "percentage": 98.82, "elapsed_time": "0:53:46", "remaining_time": "0:00:38", "throughput": 2072.56, "total_tokens": 6687816}
3285
+ {"current_steps": 16330, "total_steps": 16520, "loss": 0.0005, "lr": 4.07138746818192e-07, "epoch": 19.769975786924938, "percentage": 98.85, "elapsed_time": "0:53:47", "remaining_time": "0:00:37", "throughput": 2072.57, "total_tokens": 6689768}
3286
+ {"current_steps": 16335, "total_steps": 16520, "loss": 0.001, "lr": 3.8610429739732724e-07, "epoch": 19.776029055690074, "percentage": 98.88, "elapsed_time": "0:53:48", "remaining_time": "0:00:36", "throughput": 2072.62, "total_tokens": 6691848}
3287
+ {"current_steps": 16340, "total_steps": 16520, "loss": 0.0015, "lr": 3.65627507492694e-07, "epoch": 19.782082324455207, "percentage": 98.91, "elapsed_time": "0:53:49", "remaining_time": "0:00:35", "throughput": 2072.64, "total_tokens": 6693896}
3288
+ {"current_steps": 16345, "total_steps": 16520, "loss": 0.0005, "lr": 3.457083999601207e-07, "epoch": 19.78813559322034, "percentage": 98.94, "elapsed_time": "0:53:50", "remaining_time": "0:00:34", "throughput": 2072.7, "total_tokens": 6696104}
3289
+ {"current_steps": 16350, "total_steps": 16520, "loss": 0.0005, "lr": 3.2634699703293356e-07, "epoch": 19.79418886198547, "percentage": 98.97, "elapsed_time": "0:53:51", "remaining_time": "0:00:33", "throughput": 2072.74, "total_tokens": 6698088}
3290
+ {"current_steps": 16355, "total_steps": 16520, "loss": 0.0006, "lr": 3.075433203219569e-07, "epoch": 19.800242130750604, "percentage": 99.0, "elapsed_time": "0:53:52", "remaining_time": "0:00:32", "throughput": 2072.77, "total_tokens": 6700168}
3291
+ {"current_steps": 16360, "total_steps": 16520, "loss": 0.0012, "lr": 2.8929739081545724e-07, "epoch": 19.80629539951574, "percentage": 99.03, "elapsed_time": "0:53:53", "remaining_time": "0:00:31", "throughput": 2072.81, "total_tokens": 6702280}
3292
+ {"current_steps": 16365, "total_steps": 16520, "loss": 0.0025, "lr": 2.7160922887925486e-07, "epoch": 19.812348668280872, "percentage": 99.06, "elapsed_time": "0:53:54", "remaining_time": "0:00:30", "throughput": 2072.86, "total_tokens": 6704360}
3293
+ {"current_steps": 16370, "total_steps": 16520, "loss": 0.0006, "lr": 2.544788542565013e-07, "epoch": 19.818401937046005, "percentage": 99.09, "elapsed_time": "0:53:55", "remaining_time": "0:00:29", "throughput": 2072.88, "total_tokens": 6706408}
3294
+ {"current_steps": 16375, "total_steps": 16520, "loss": 0.0005, "lr": 2.3790628606779053e-07, "epoch": 19.824455205811137, "percentage": 99.12, "elapsed_time": "0:53:56", "remaining_time": "0:00:28", "throughput": 2072.89, "total_tokens": 6708328}
3295
+ {"current_steps": 16380, "total_steps": 16520, "loss": 0.0003, "lr": 2.2189154281115898e-07, "epoch": 19.83050847457627, "percentage": 99.15, "elapsed_time": "0:53:57", "remaining_time": "0:00:27", "throughput": 2072.9, "total_tokens": 6710312}
3296
+ {"current_steps": 16385, "total_steps": 16520, "loss": 0.0003, "lr": 2.06434642361919e-07, "epoch": 19.836561743341406, "percentage": 99.18, "elapsed_time": "0:53:58", "remaining_time": "0:00:26", "throughput": 2072.92, "total_tokens": 6712360}
3297
+ {"current_steps": 16390, "total_steps": 16520, "loss": 0.0017, "lr": 1.9153560197276986e-07, "epoch": 19.84261501210654, "percentage": 99.21, "elapsed_time": "0:53:59", "remaining_time": "0:00:25", "throughput": 2072.94, "total_tokens": 6714344}
3298
+ {"current_steps": 16395, "total_steps": 16520, "loss": 0.002, "lr": 1.7719443827368674e-07, "epoch": 19.84866828087167, "percentage": 99.24, "elapsed_time": "0:54:00", "remaining_time": "0:00:24", "throughput": 2073.02, "total_tokens": 6716616}
3299
+ {"current_steps": 16400, "total_steps": 16520, "loss": 0.0006, "lr": 1.6341116727214277e-07, "epoch": 19.854721549636803, "percentage": 99.27, "elapsed_time": "0:54:00", "remaining_time": "0:00:23", "throughput": 2073.07, "total_tokens": 6718728}
3300
+ {"current_steps": 16405, "total_steps": 16520, "loss": 0.0004, "lr": 1.5018580435260944e-07, "epoch": 19.860774818401936, "percentage": 99.3, "elapsed_time": "0:54:01", "remaining_time": "0:00:22", "throughput": 2073.09, "total_tokens": 6720712}
3301
+ {"current_steps": 16410, "total_steps": 16520, "loss": 0.0002, "lr": 1.375183642770561e-07, "epoch": 19.86682808716707, "percentage": 99.33, "elapsed_time": "0:54:02", "remaining_time": "0:00:21", "throughput": 2073.1, "total_tokens": 6722632}
3302
+ {"current_steps": 16415, "total_steps": 16520, "loss": 0.0005, "lr": 1.2540886118467264e-07, "epoch": 19.872881355932204, "percentage": 99.36, "elapsed_time": "0:54:03", "remaining_time": "0:00:20", "throughput": 2073.13, "total_tokens": 6724648}
3303
+ {"current_steps": 16420, "total_steps": 16520, "loss": 0.0005, "lr": 1.1385730859181376e-07, "epoch": 19.878934624697337, "percentage": 99.39, "elapsed_time": "0:54:04", "remaining_time": "0:00:19", "throughput": 2073.16, "total_tokens": 6726664}
3304
+ {"current_steps": 16425, "total_steps": 16520, "loss": 0.0006, "lr": 1.0286371939205452e-07, "epoch": 19.88498789346247, "percentage": 99.42, "elapsed_time": "0:54:05", "remaining_time": "0:00:18", "throughput": 2073.18, "total_tokens": 6728552}
3305
+ {"current_steps": 16430, "total_steps": 16520, "loss": 0.0004, "lr": 9.242810585630146e-08, "epoch": 19.8910411622276, "percentage": 99.46, "elapsed_time": "0:54:06", "remaining_time": "0:00:17", "throughput": 2073.18, "total_tokens": 6730536}
3306
+ {"current_steps": 16435, "total_steps": 16520, "loss": 0.0011, "lr": 8.255047963262596e-08, "epoch": 19.897094430992738, "percentage": 99.49, "elapsed_time": "0:54:07", "remaining_time": "0:00:16", "throughput": 2073.21, "total_tokens": 6732552}
3307
+ {"current_steps": 16440, "total_steps": 16520, "loss": 0.0007, "lr": 7.323085174609778e-08, "epoch": 19.90314769975787, "percentage": 99.52, "elapsed_time": "0:54:08", "remaining_time": "0:00:15", "throughput": 2073.24, "total_tokens": 6734536}
3308
+ {"current_steps": 16445, "total_steps": 16520, "loss": 0.0032, "lr": 6.446923259928461e-08, "epoch": 19.909200968523002, "percentage": 99.55, "elapsed_time": "0:54:09", "remaining_time": "0:00:14", "throughput": 2073.29, "total_tokens": 6736680}
3309
+ {"current_steps": 16450, "total_steps": 16520, "loss": 0.0014, "lr": 5.626563197158596e-08, "epoch": 19.915254237288135, "percentage": 99.58, "elapsed_time": "0:54:10", "remaining_time": "0:00:13", "throughput": 2073.31, "total_tokens": 6738728}
3310
+ {"current_steps": 16455, "total_steps": 16520, "loss": 0.0006, "lr": 4.8620059019788275e-08, "epoch": 19.921307506053267, "percentage": 99.61, "elapsed_time": "0:54:11", "remaining_time": "0:00:12", "throughput": 2073.32, "total_tokens": 6740712}
3311
+ {"current_steps": 16460, "total_steps": 16520, "loss": 0.0013, "lr": 4.153252227773185e-08, "epoch": 19.927360774818403, "percentage": 99.64, "elapsed_time": "0:54:12", "remaining_time": "0:00:11", "throughput": 2073.35, "total_tokens": 6742664}
3312
+ {"current_steps": 16465, "total_steps": 16520, "loss": 0.0006, "lr": 3.500302965636637e-08, "epoch": 19.933414043583536, "percentage": 99.67, "elapsed_time": "0:54:12", "remaining_time": "0:00:10", "throughput": 2073.39, "total_tokens": 6744744}
3313
+ {"current_steps": 16470, "total_steps": 16520, "loss": 0.0102, "lr": 2.9031588443750866e-08, "epoch": 19.93946731234867, "percentage": 99.7, "elapsed_time": "0:54:13", "remaining_time": "0:00:09", "throughput": 2073.42, "total_tokens": 6746728}
3314
+ {"current_steps": 16475, "total_steps": 16520, "loss": 0.0014, "lr": 2.3618205305220298e-08, "epoch": 19.9455205811138, "percentage": 99.73, "elapsed_time": "0:54:14", "remaining_time": "0:00:08", "throughput": 2073.44, "total_tokens": 6748776}
3315
+ {"current_steps": 16480, "total_steps": 16520, "loss": 0.0005, "lr": 1.8762886282941427e-08, "epoch": 19.951573849878933, "percentage": 99.76, "elapsed_time": "0:54:15", "remaining_time": "0:00:07", "throughput": 2073.48, "total_tokens": 6750888}
3316
+ {"current_steps": 16485, "total_steps": 16520, "loss": 0.0008, "lr": 1.446563679641244e-08, "epoch": 19.95762711864407, "percentage": 99.79, "elapsed_time": "0:54:16", "remaining_time": "0:00:06", "throughput": 2073.52, "total_tokens": 6752936}
3317
+ {"current_steps": 16490, "total_steps": 16520, "loss": 0.0027, "lr": 1.072646164212987e-08, "epoch": 19.9636803874092, "percentage": 99.82, "elapsed_time": "0:54:17", "remaining_time": "0:00:05", "throughput": 2073.56, "total_tokens": 6754984}
3318
+ {"current_steps": 16495, "total_steps": 16520, "loss": 0.0014, "lr": 7.545364993699621e-09, "epoch": 19.969733656174334, "percentage": 99.85, "elapsed_time": "0:54:18", "remaining_time": "0:00:04", "throughput": 2073.61, "total_tokens": 6757128}
3319
+ {"current_steps": 16500, "total_steps": 16520, "loss": 0.0005, "lr": 4.922350401781461e-09, "epoch": 19.975786924939467, "percentage": 99.88, "elapsed_time": "0:54:19", "remaining_time": "0:00:03", "throughput": 2073.62, "total_tokens": 6759080}
3320
+ {"current_steps": 16505, "total_steps": 16520, "loss": 0.008, "lr": 2.8574207941445275e-09, "epoch": 19.9818401937046, "percentage": 99.91, "elapsed_time": "0:54:20", "remaining_time": "0:00:02", "throughput": 2073.66, "total_tokens": 6761128}
3321
+ {"current_steps": 16510, "total_steps": 16520, "loss": 0.0013, "lr": 1.350578475611819e-09, "epoch": 19.987893462469735, "percentage": 99.94, "elapsed_time": "0:54:21", "remaining_time": "0:00:01", "throughput": 2073.7, "total_tokens": 6763240}
3322
+ {"current_steps": 16515, "total_steps": 16520, "loss": 0.0016, "lr": 4.0182512806019587e-10, "epoch": 19.993946731234868, "percentage": 99.97, "elapsed_time": "0:54:22", "remaining_time": "0:00:00", "throughput": 2073.72, "total_tokens": 6765288}
3323
+ {"current_steps": 16520, "total_steps": 16520, "loss": 0.0005, "lr": 1.116181058691268e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:23", "remaining_time": "0:00:00", "throughput": 2073.71, "total_tokens": 6767120}
3324
+ {"current_steps": 16520, "total_steps": 16520, "eval_loss": 0.3815227448940277, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:31", "remaining_time": "0:00:00", "throughput": 2068.77, "total_tokens": 6767120}
3325
+ {"current_steps": 16520, "total_steps": 16520, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:32", "remaining_time": "0:00:00", "throughput": 2068.13, "total_tokens": 6767120}