Training in progress, step 16520
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +165 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e653316ed51c5d355ef66e5613c89d11c1e8b5731b7db406a01e8fb3f0e4c9a0
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -3158,3 +3158,168 @@
|
|
| 3158 |
{"current_steps": 15695, "total_steps": 16520, "loss": 0.0011, "lr": 7.59612349389599e-06, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:51:48", "remaining_time": "0:02:43", "throughput": 2068.41, "total_tokens": 6429832}
|
| 3159 |
{"current_steps": 15700, "total_steps": 16520, "loss": 0.0008, "lr": 7.50466912679143e-06, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:51:49", "remaining_time": "0:02:42", "throughput": 2068.41, "total_tokens": 6431848}
|
| 3160 |
{"current_steps": 15705, "total_steps": 16520, "loss": 0.0019, "lr": 7.413764473596596e-06, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:51:50", "remaining_time": "0:02:41", "throughput": 2068.45, "total_tokens": 6433960}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3158 |
{"current_steps": 15695, "total_steps": 16520, "loss": 0.0011, "lr": 7.59612349389599e-06, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:51:48", "remaining_time": "0:02:43", "throughput": 2068.41, "total_tokens": 6429832}
|
| 3159 |
{"current_steps": 15700, "total_steps": 16520, "loss": 0.0008, "lr": 7.50466912679143e-06, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:51:49", "remaining_time": "0:02:42", "throughput": 2068.41, "total_tokens": 6431848}
|
| 3160 |
{"current_steps": 15705, "total_steps": 16520, "loss": 0.0019, "lr": 7.413764473596596e-06, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:51:50", "remaining_time": "0:02:41", "throughput": 2068.45, "total_tokens": 6433960}
|
| 3161 |
+
{"current_steps": 15710, "total_steps": 16520, "loss": 0.0011, "lr": 7.3234096357775444e-06, "epoch": 19.019370460048425, "percentage": 95.1, "elapsed_time": "0:51:51", "remaining_time": "0:02:40", "throughput": 2068.47, "total_tokens": 6435944}
|
| 3162 |
+
{"current_steps": 15715, "total_steps": 16520, "loss": 0.0004, "lr": 7.233604714186659e-06, "epoch": 19.02542372881356, "percentage": 95.13, "elapsed_time": "0:51:52", "remaining_time": "0:02:39", "throughput": 2068.5, "total_tokens": 6437992}
|
| 3163 |
+
{"current_steps": 15720, "total_steps": 16520, "loss": 0.0005, "lr": 7.144349809062422e-06, "epoch": 19.031476997578693, "percentage": 95.16, "elapsed_time": "0:51:53", "remaining_time": "0:02:38", "throughput": 2068.54, "total_tokens": 6440104}
|
| 3164 |
+
{"current_steps": 15725, "total_steps": 16520, "loss": 0.0004, "lr": 7.055645020029533e-06, "epoch": 19.037530266343826, "percentage": 95.19, "elapsed_time": "0:51:54", "remaining_time": "0:02:37", "throughput": 2068.56, "total_tokens": 6442152}
|
| 3165 |
+
{"current_steps": 15730, "total_steps": 16520, "loss": 0.0013, "lr": 6.967490446098457e-06, "epoch": 19.043583535108958, "percentage": 95.22, "elapsed_time": "0:51:55", "remaining_time": "0:02:36", "throughput": 2068.6, "total_tokens": 6444200}
|
| 3166 |
+
{"current_steps": 15735, "total_steps": 16520, "loss": 0.0008, "lr": 6.879886185665818e-06, "epoch": 19.04963680387409, "percentage": 95.25, "elapsed_time": "0:51:56", "remaining_time": "0:02:35", "throughput": 2068.64, "total_tokens": 6446216}
|
| 3167 |
+
{"current_steps": 15740, "total_steps": 16520, "loss": 0.0006, "lr": 6.792832336513732e-06, "epoch": 19.055690072639226, "percentage": 95.28, "elapsed_time": "0:51:57", "remaining_time": "0:02:34", "throughput": 2068.7, "total_tokens": 6448360}
|
| 3168 |
+
{"current_steps": 15745, "total_steps": 16520, "loss": 0.0005, "lr": 6.7063289958099735e-06, "epoch": 19.06174334140436, "percentage": 95.31, "elapsed_time": "0:51:58", "remaining_time": "0:02:33", "throughput": 2068.74, "total_tokens": 6450440}
|
| 3169 |
+
{"current_steps": 15750, "total_steps": 16520, "loss": 0.002, "lr": 6.620376260108141e-06, "epoch": 19.06779661016949, "percentage": 95.34, "elapsed_time": "0:51:59", "remaining_time": "0:02:32", "throughput": 2068.79, "total_tokens": 6452552}
|
| 3170 |
+
{"current_steps": 15755, "total_steps": 16520, "loss": 0.0005, "lr": 6.534974225346824e-06, "epoch": 19.073849878934624, "percentage": 95.37, "elapsed_time": "0:51:59", "remaining_time": "0:02:31", "throughput": 2068.8, "total_tokens": 6454568}
|
| 3171 |
+
{"current_steps": 15760, "total_steps": 16520, "loss": 0.0014, "lr": 6.450122986850216e-06, "epoch": 19.079903147699756, "percentage": 95.4, "elapsed_time": "0:52:00", "remaining_time": "0:02:30", "throughput": 2068.81, "total_tokens": 6456552}
|
| 3172 |
+
{"current_steps": 15765, "total_steps": 16520, "loss": 0.0007, "lr": 6.365822639327723e-06, "epoch": 19.085956416464892, "percentage": 95.43, "elapsed_time": "0:52:01", "remaining_time": "0:02:29", "throughput": 2068.83, "total_tokens": 6458600}
|
| 3173 |
+
{"current_steps": 15770, "total_steps": 16520, "loss": 0.0009, "lr": 6.2820732768736895e-06, "epoch": 19.092009685230025, "percentage": 95.46, "elapsed_time": "0:52:02", "remaining_time": "0:02:28", "throughput": 2068.83, "total_tokens": 6460552}
|
| 3174 |
+
{"current_steps": 15775, "total_steps": 16520, "loss": 0.0351, "lr": 6.198874992967673e-06, "epoch": 19.098062953995157, "percentage": 95.49, "elapsed_time": "0:52:03", "remaining_time": "0:02:27", "throughput": 2068.86, "total_tokens": 6462600}
|
| 3175 |
+
{"current_steps": 15780, "total_steps": 16520, "loss": 0.0007, "lr": 6.11622788047389e-06, "epoch": 19.10411622276029, "percentage": 95.52, "elapsed_time": "0:52:04", "remaining_time": "0:02:26", "throughput": 2068.88, "total_tokens": 6464520}
|
| 3176 |
+
{"current_steps": 15785, "total_steps": 16520, "loss": 0.0006, "lr": 6.034132031641548e-06, "epoch": 19.110169491525422, "percentage": 95.55, "elapsed_time": "0:52:05", "remaining_time": "0:02:25", "throughput": 2068.91, "total_tokens": 6466440}
|
| 3177 |
+
{"current_steps": 15790, "total_steps": 16520, "loss": 0.0012, "lr": 5.9525875381044035e-06, "epoch": 19.116222760290558, "percentage": 95.58, "elapsed_time": "0:52:06", "remaining_time": "0:02:24", "throughput": 2068.91, "total_tokens": 6468424}
|
| 3178 |
+
{"current_steps": 15795, "total_steps": 16520, "loss": 0.001, "lr": 5.871594490880927e-06, "epoch": 19.12227602905569, "percentage": 95.61, "elapsed_time": "0:52:07", "remaining_time": "0:02:23", "throughput": 2068.96, "total_tokens": 6470472}
|
| 3179 |
+
{"current_steps": 15800, "total_steps": 16520, "loss": 0.0018, "lr": 5.791152980374082e-06, "epoch": 19.128329297820823, "percentage": 95.64, "elapsed_time": "0:52:08", "remaining_time": "0:02:22", "throughput": 2069.0, "total_tokens": 6472456}
|
| 3180 |
+
{"current_steps": 15805, "total_steps": 16520, "loss": 0.0005, "lr": 5.711263096370989e-06, "epoch": 19.134382566585955, "percentage": 95.67, "elapsed_time": "0:52:09", "remaining_time": "0:02:21", "throughput": 2069.01, "total_tokens": 6474344}
|
| 3181 |
+
{"current_steps": 15810, "total_steps": 16520, "loss": 0.0008, "lr": 5.631924928043319e-06, "epoch": 19.140435835351088, "percentage": 95.7, "elapsed_time": "0:52:10", "remaining_time": "0:02:20", "throughput": 2069.03, "total_tokens": 6476168}
|
| 3182 |
+
{"current_steps": 15815, "total_steps": 16520, "loss": 0.0008, "lr": 5.553138563946847e-06, "epoch": 19.146489104116224, "percentage": 95.73, "elapsed_time": "0:52:11", "remaining_time": "0:02:19", "throughput": 2069.08, "total_tokens": 6478312}
|
| 3183 |
+
{"current_steps": 15820, "total_steps": 16520, "loss": 0.0008, "lr": 5.4749040920212824e-06, "epoch": 19.152542372881356, "percentage": 95.76, "elapsed_time": "0:52:11", "remaining_time": "0:02:18", "throughput": 2069.12, "total_tokens": 6480360}
|
| 3184 |
+
{"current_steps": 15825, "total_steps": 16520, "loss": 0.0007, "lr": 5.397221599590607e-06, "epoch": 19.15859564164649, "percentage": 95.79, "elapsed_time": "0:52:12", "remaining_time": "0:02:17", "throughput": 2069.18, "total_tokens": 6482536}
|
| 3185 |
+
{"current_steps": 15830, "total_steps": 16520, "loss": 0.0008, "lr": 5.3200911733624625e-06, "epoch": 19.16464891041162, "percentage": 95.82, "elapsed_time": "0:52:13", "remaining_time": "0:02:16", "throughput": 2069.22, "total_tokens": 6484584}
|
| 3186 |
+
{"current_steps": 15835, "total_steps": 16520, "loss": 0.0009, "lr": 5.243512899428315e-06, "epoch": 19.170702179176754, "percentage": 95.85, "elapsed_time": "0:52:14", "remaining_time": "0:02:15", "throughput": 2069.3, "total_tokens": 6486888}
|
| 3187 |
+
{"current_steps": 15840, "total_steps": 16520, "loss": 0.0008, "lr": 5.167486863263515e-06, "epoch": 19.17675544794189, "percentage": 95.88, "elapsed_time": "0:52:15", "remaining_time": "0:02:14", "throughput": 2069.3, "total_tokens": 6488840}
|
| 3188 |
+
{"current_steps": 15845, "total_steps": 16520, "loss": 0.0009, "lr": 5.092013149726737e-06, "epoch": 19.182808716707022, "percentage": 95.91, "elapsed_time": "0:52:16", "remaining_time": "0:02:13", "throughput": 2069.31, "total_tokens": 6490696}
|
| 3189 |
+
{"current_steps": 15850, "total_steps": 16520, "loss": 0.0004, "lr": 5.017091843060317e-06, "epoch": 19.188861985472155, "percentage": 95.94, "elapsed_time": "0:52:17", "remaining_time": "0:02:12", "throughput": 2069.35, "total_tokens": 6492808}
|
| 3190 |
+
{"current_steps": 15855, "total_steps": 16520, "loss": 0.0013, "lr": 4.942723026890139e-06, "epoch": 19.194915254237287, "percentage": 95.97, "elapsed_time": "0:52:18", "remaining_time": "0:02:11", "throughput": 2069.38, "total_tokens": 6494760}
|
| 3191 |
+
{"current_steps": 15860, "total_steps": 16520, "loss": 0.0007, "lr": 4.86890678422508e-06, "epoch": 19.20096852300242, "percentage": 96.0, "elapsed_time": "0:52:19", "remaining_time": "0:02:10", "throughput": 2069.42, "total_tokens": 6496840}
|
| 3192 |
+
{"current_steps": 15865, "total_steps": 16520, "loss": 0.0006, "lr": 4.795643197457511e-06, "epoch": 19.207021791767556, "percentage": 96.04, "elapsed_time": "0:52:20", "remaining_time": "0:02:09", "throughput": 2069.47, "total_tokens": 6498920}
|
| 3193 |
+
{"current_steps": 15870, "total_steps": 16520, "loss": 0.0008, "lr": 4.722932348362851e-06, "epoch": 19.213075060532688, "percentage": 96.07, "elapsed_time": "0:52:21", "remaining_time": "0:02:08", "throughput": 2069.49, "total_tokens": 6500968}
|
| 3194 |
+
{"current_steps": 15875, "total_steps": 16520, "loss": 0.0005, "lr": 4.650774318099571e-06, "epoch": 19.21912832929782, "percentage": 96.1, "elapsed_time": "0:52:22", "remaining_time": "0:02:07", "throughput": 2069.55, "total_tokens": 6503144}
|
| 3195 |
+
{"current_steps": 15880, "total_steps": 16520, "loss": 0.001, "lr": 4.57916918720902e-06, "epoch": 19.225181598062953, "percentage": 96.13, "elapsed_time": "0:52:23", "remaining_time": "0:02:06", "throughput": 2069.6, "total_tokens": 6505224}
|
| 3196 |
+
{"current_steps": 15885, "total_steps": 16520, "loss": 0.0014, "lr": 4.5081170356156e-06, "epoch": 19.231234866828085, "percentage": 96.16, "elapsed_time": "0:52:24", "remaining_time": "0:02:05", "throughput": 2069.62, "total_tokens": 6507176}
|
| 3197 |
+
{"current_steps": 15890, "total_steps": 16520, "loss": 0.0014, "lr": 4.4376179426262595e-06, "epoch": 19.23728813559322, "percentage": 96.19, "elapsed_time": "0:52:25", "remaining_time": "0:02:04", "throughput": 2069.65, "total_tokens": 6509256}
|
| 3198 |
+
{"current_steps": 15895, "total_steps": 16520, "loss": 0.002, "lr": 4.367671986930777e-06, "epoch": 19.243341404358354, "percentage": 96.22, "elapsed_time": "0:52:26", "remaining_time": "0:02:03", "throughput": 2069.59, "total_tokens": 6511144}
|
| 3199 |
+
{"current_steps": 15900, "total_steps": 16520, "loss": 0.0027, "lr": 4.298279246601533e-06, "epoch": 19.249394673123486, "percentage": 96.25, "elapsed_time": "0:52:27", "remaining_time": "0:02:02", "throughput": 2069.62, "total_tokens": 6513128}
|
| 3200 |
+
{"current_steps": 15905, "total_steps": 16520, "loss": 0.0019, "lr": 4.229439799093293e-06, "epoch": 19.25544794188862, "percentage": 96.28, "elapsed_time": "0:52:27", "remaining_time": "0:02:01", "throughput": 2069.63, "total_tokens": 6515080}
|
| 3201 |
+
{"current_steps": 15910, "total_steps": 16520, "loss": 0.0009, "lr": 4.161153721243482e-06, "epoch": 19.26150121065375, "percentage": 96.31, "elapsed_time": "0:52:28", "remaining_time": "0:02:00", "throughput": 2069.68, "total_tokens": 6517192}
|
| 3202 |
+
{"current_steps": 15915, "total_steps": 16520, "loss": 0.0006, "lr": 4.093421089271576e-06, "epoch": 19.267554479418887, "percentage": 96.34, "elapsed_time": "0:52:29", "remaining_time": "0:01:59", "throughput": 2069.72, "total_tokens": 6519240}
|
| 3203 |
+
{"current_steps": 15920, "total_steps": 16520, "loss": 0.0008, "lr": 4.026241978779599e-06, "epoch": 19.27360774818402, "percentage": 96.37, "elapsed_time": "0:52:30", "remaining_time": "0:01:58", "throughput": 2069.74, "total_tokens": 6521256}
|
| 3204 |
+
{"current_steps": 15925, "total_steps": 16520, "loss": 0.0004, "lr": 3.9596164647514035e-06, "epoch": 19.279661016949152, "percentage": 96.4, "elapsed_time": "0:52:31", "remaining_time": "0:01:57", "throughput": 2069.78, "total_tokens": 6523400}
|
| 3205 |
+
{"current_steps": 15930, "total_steps": 16520, "loss": 0.0016, "lr": 3.89354462155328e-06, "epoch": 19.285714285714285, "percentage": 96.43, "elapsed_time": "0:52:32", "remaining_time": "0:01:56", "throughput": 2069.81, "total_tokens": 6525416}
|
| 3206 |
+
{"current_steps": 15935, "total_steps": 16520, "loss": 0.0004, "lr": 3.828026522933292e-06, "epoch": 19.291767554479417, "percentage": 96.46, "elapsed_time": "0:52:33", "remaining_time": "0:01:55", "throughput": 2069.86, "total_tokens": 6527496}
|
| 3207 |
+
{"current_steps": 15940, "total_steps": 16520, "loss": 0.0023, "lr": 3.7630622420214978e-06, "epoch": 19.297820823244553, "percentage": 96.49, "elapsed_time": "0:52:34", "remaining_time": "0:01:54", "throughput": 2069.87, "total_tokens": 6529512}
|
| 3208 |
+
{"current_steps": 15945, "total_steps": 16520, "loss": 0.0008, "lr": 3.6986518513298374e-06, "epoch": 19.303874092009686, "percentage": 96.52, "elapsed_time": "0:52:35", "remaining_time": "0:01:53", "throughput": 2069.92, "total_tokens": 6531688}
|
| 3209 |
+
{"current_steps": 15950, "total_steps": 16520, "loss": 0.0017, "lr": 3.6347954227518574e-06, "epoch": 19.309927360774818, "percentage": 96.55, "elapsed_time": "0:52:36", "remaining_time": "0:01:52", "throughput": 2069.93, "total_tokens": 6533544}
|
| 3210 |
+
{"current_steps": 15955, "total_steps": 16520, "loss": 0.0117, "lr": 3.5714930275629887e-06, "epoch": 19.31598062953995, "percentage": 96.58, "elapsed_time": "0:52:37", "remaining_time": "0:01:51", "throughput": 2069.96, "total_tokens": 6535592}
|
| 3211 |
+
{"current_steps": 15960, "total_steps": 16520, "loss": 0.0009, "lr": 3.508744736420155e-06, "epoch": 19.322033898305083, "percentage": 96.61, "elapsed_time": "0:52:38", "remaining_time": "0:01:50", "throughput": 2070.02, "total_tokens": 6537768}
|
| 3212 |
+
{"current_steps": 15965, "total_steps": 16520, "loss": 0.0003, "lr": 3.4465506193617215e-06, "epoch": 19.32808716707022, "percentage": 96.64, "elapsed_time": "0:52:39", "remaining_time": "0:01:49", "throughput": 2070.05, "total_tokens": 6539720}
|
| 3213 |
+
{"current_steps": 15970, "total_steps": 16520, "loss": 0.0023, "lr": 3.384910745807657e-06, "epoch": 19.33414043583535, "percentage": 96.67, "elapsed_time": "0:52:40", "remaining_time": "0:01:48", "throughput": 2070.07, "total_tokens": 6541608}
|
| 3214 |
+
{"current_steps": 15975, "total_steps": 16520, "loss": 0.0005, "lr": 3.3238251845592037e-06, "epoch": 19.340193704600484, "percentage": 96.7, "elapsed_time": "0:52:41", "remaining_time": "0:01:47", "throughput": 2070.09, "total_tokens": 6543592}
|
| 3215 |
+
{"current_steps": 15980, "total_steps": 16520, "loss": 0.0005, "lr": 3.2632940037988225e-06, "epoch": 19.346246973365616, "percentage": 96.73, "elapsed_time": "0:52:41", "remaining_time": "0:01:46", "throughput": 2070.16, "total_tokens": 6545800}
|
| 3216 |
+
{"current_steps": 15985, "total_steps": 16520, "loss": 0.0005, "lr": 3.2033172710904114e-06, "epoch": 19.352300242130752, "percentage": 96.76, "elapsed_time": "0:52:42", "remaining_time": "0:01:45", "throughput": 2070.2, "total_tokens": 6547912}
|
| 3217 |
+
{"current_steps": 15990, "total_steps": 16520, "loss": 0.0009, "lr": 3.143895053378698e-06, "epoch": 19.358353510895885, "percentage": 96.79, "elapsed_time": "0:52:43", "remaining_time": "0:01:44", "throughput": 2070.28, "total_tokens": 6550088}
|
| 3218 |
+
{"current_steps": 15995, "total_steps": 16520, "loss": 0.0005, "lr": 3.0850274169897386e-06, "epoch": 19.364406779661017, "percentage": 96.82, "elapsed_time": "0:52:44", "remaining_time": "0:01:43", "throughput": 2070.32, "total_tokens": 6552200}
|
| 3219 |
+
{"current_steps": 16000, "total_steps": 16520, "loss": 0.0022, "lr": 3.0267144276304726e-06, "epoch": 19.37046004842615, "percentage": 96.85, "elapsed_time": "0:52:45", "remaining_time": "0:01:42", "throughput": 2070.34, "total_tokens": 6554184}
|
| 3220 |
+
{"current_steps": 16005, "total_steps": 16520, "loss": 0.0004, "lr": 2.9689561503886685e-06, "epoch": 19.376513317191282, "percentage": 96.88, "elapsed_time": "0:52:46", "remaining_time": "0:01:41", "throughput": 2070.36, "total_tokens": 6556200}
|
| 3221 |
+
{"current_steps": 16010, "total_steps": 16520, "loss": 0.0017, "lr": 2.911752649733035e-06, "epoch": 19.38256658595642, "percentage": 96.91, "elapsed_time": "0:52:47", "remaining_time": "0:01:40", "throughput": 2070.4, "total_tokens": 6558312}
|
| 3222 |
+
{"current_steps": 16015, "total_steps": 16520, "loss": 0.0009, "lr": 2.855103989513108e-06, "epoch": 19.38861985472155, "percentage": 96.94, "elapsed_time": "0:52:48", "remaining_time": "0:01:39", "throughput": 2070.47, "total_tokens": 6560456}
|
| 3223 |
+
{"current_steps": 16020, "total_steps": 16520, "loss": 0.0007, "lr": 2.7990102329589206e-06, "epoch": 19.394673123486683, "percentage": 96.97, "elapsed_time": "0:52:49", "remaining_time": "0:01:38", "throughput": 2070.52, "total_tokens": 6562568}
|
| 3224 |
+
{"current_steps": 16025, "total_steps": 16520, "loss": 0.0005, "lr": 2.7434714426813334e-06, "epoch": 19.400726392251816, "percentage": 97.0, "elapsed_time": "0:52:50", "remaining_time": "0:01:37", "throughput": 2070.56, "total_tokens": 6564584}
|
| 3225 |
+
{"current_steps": 16030, "total_steps": 16520, "loss": 0.0005, "lr": 2.688487680671703e-06, "epoch": 19.406779661016948, "percentage": 97.03, "elapsed_time": "0:52:51", "remaining_time": "0:01:36", "throughput": 2070.56, "total_tokens": 6566568}
|
| 3226 |
+
{"current_steps": 16035, "total_steps": 16520, "loss": 0.0015, "lr": 2.6340590083018257e-06, "epoch": 19.412832929782084, "percentage": 97.06, "elapsed_time": "0:52:52", "remaining_time": "0:01:35", "throughput": 2070.58, "total_tokens": 6568456}
|
| 3227 |
+
{"current_steps": 16040, "total_steps": 16520, "loss": 0.0007, "lr": 2.580185486323994e-06, "epoch": 19.418886198547217, "percentage": 97.09, "elapsed_time": "0:52:53", "remaining_time": "0:01:34", "throughput": 2070.61, "total_tokens": 6570440}
|
| 3228 |
+
{"current_steps": 16045, "total_steps": 16520, "loss": 0.002, "lr": 2.5268671748707173e-06, "epoch": 19.42493946731235, "percentage": 97.12, "elapsed_time": "0:52:54", "remaining_time": "0:01:33", "throughput": 2070.6, "total_tokens": 6572392}
|
| 3229 |
+
{"current_steps": 16050, "total_steps": 16520, "loss": 0.0005, "lr": 2.4741041334549463e-06, "epoch": 19.43099273607748, "percentage": 97.15, "elapsed_time": "0:52:55", "remaining_time": "0:01:32", "throughput": 2070.62, "total_tokens": 6574408}
|
| 3230 |
+
{"current_steps": 16055, "total_steps": 16520, "loss": 0.001, "lr": 2.421896420969738e-06, "epoch": 19.437046004842614, "percentage": 97.19, "elapsed_time": "0:52:55", "remaining_time": "0:01:31", "throughput": 2070.63, "total_tokens": 6576264}
|
| 3231 |
+
{"current_steps": 16060, "total_steps": 16520, "loss": 0.0005, "lr": 2.3702440956884232e-06, "epoch": 19.44309927360775, "percentage": 97.22, "elapsed_time": "0:52:56", "remaining_time": "0:01:30", "throughput": 2070.66, "total_tokens": 6578280}
|
| 3232 |
+
{"current_steps": 16065, "total_steps": 16520, "loss": 0.0014, "lr": 2.3191472152642724e-06, "epoch": 19.449152542372882, "percentage": 97.25, "elapsed_time": "0:52:57", "remaining_time": "0:01:30", "throughput": 2070.69, "total_tokens": 6580328}
|
| 3233 |
+
{"current_steps": 16070, "total_steps": 16520, "loss": 0.001, "lr": 2.268605836730664e-06, "epoch": 19.455205811138015, "percentage": 97.28, "elapsed_time": "0:52:58", "remaining_time": "0:01:29", "throughput": 2070.71, "total_tokens": 6582376}
|
| 3234 |
+
{"current_steps": 16075, "total_steps": 16520, "loss": 0.0126, "lr": 2.2186200165009162e-06, "epoch": 19.461259079903147, "percentage": 97.31, "elapsed_time": "0:52:59", "remaining_time": "0:01:28", "throughput": 2070.77, "total_tokens": 6584552}
|
| 3235 |
+
{"current_steps": 16080, "total_steps": 16520, "loss": 0.0013, "lr": 2.1691898103682882e-06, "epoch": 19.46731234866828, "percentage": 97.34, "elapsed_time": "0:53:00", "remaining_time": "0:01:27", "throughput": 2070.79, "total_tokens": 6586504}
|
| 3236 |
+
{"current_steps": 16085, "total_steps": 16520, "loss": 0.0006, "lr": 2.120315273505813e-06, "epoch": 19.473365617433416, "percentage": 97.37, "elapsed_time": "0:53:01", "remaining_time": "0:01:26", "throughput": 2070.85, "total_tokens": 6588648}
|
| 3237 |
+
{"current_steps": 16090, "total_steps": 16520, "loss": 0.0012, "lr": 2.071996460466297e-06, "epoch": 19.479418886198548, "percentage": 97.4, "elapsed_time": "0:53:02", "remaining_time": "0:01:25", "throughput": 2070.89, "total_tokens": 6590760}
|
| 3238 |
+
{"current_steps": 16095, "total_steps": 16520, "loss": 0.0012, "lr": 2.02423342518232e-06, "epoch": 19.48547215496368, "percentage": 97.43, "elapsed_time": "0:53:03", "remaining_time": "0:01:24", "throughput": 2070.96, "total_tokens": 6593000}
|
| 3239 |
+
{"current_steps": 16100, "total_steps": 16520, "loss": 0.0026, "lr": 1.9770262209660717e-06, "epoch": 19.491525423728813, "percentage": 97.46, "elapsed_time": "0:53:04", "remaining_time": "0:01:23", "throughput": 2071.02, "total_tokens": 6595176}
|
| 3240 |
+
{"current_steps": 16105, "total_steps": 16520, "loss": 0.0005, "lr": 1.9303749005092906e-06, "epoch": 19.497578692493946, "percentage": 97.49, "elapsed_time": "0:53:05", "remaining_time": "0:01:22", "throughput": 2071.04, "total_tokens": 6597224}
|
| 3241 |
+
{"current_steps": 16110, "total_steps": 16520, "loss": 0.001, "lr": 1.8842795158833247e-06, "epoch": 19.50363196125908, "percentage": 97.52, "elapsed_time": "0:53:06", "remaining_time": "0:01:21", "throughput": 2071.09, "total_tokens": 6599304}
|
| 3242 |
+
{"current_steps": 16115, "total_steps": 16520, "loss": 0.0027, "lr": 1.8387401185389064e-06, "epoch": 19.509685230024214, "percentage": 97.55, "elapsed_time": "0:53:07", "remaining_time": "0:01:20", "throughput": 2071.01, "total_tokens": 6601288}
|
| 3243 |
+
{"current_steps": 16120, "total_steps": 16520, "loss": 0.0008, "lr": 1.7937567593063198e-06, "epoch": 19.515738498789347, "percentage": 97.58, "elapsed_time": "0:53:08", "remaining_time": "0:01:19", "throughput": 2071.04, "total_tokens": 6603368}
|
| 3244 |
+
{"current_steps": 16125, "total_steps": 16520, "loss": 0.0014, "lr": 1.7493294883951239e-06, "epoch": 19.52179176755448, "percentage": 97.61, "elapsed_time": "0:53:09", "remaining_time": "0:01:18", "throughput": 2071.05, "total_tokens": 6605352}
|
| 3245 |
+
{"current_steps": 16130, "total_steps": 16520, "loss": 0.0007, "lr": 1.7054583553941516e-06, "epoch": 19.52784503631961, "percentage": 97.64, "elapsed_time": "0:53:10", "remaining_time": "0:01:17", "throughput": 2071.09, "total_tokens": 6607240}
|
| 3246 |
+
{"current_steps": 16135, "total_steps": 16520, "loss": 0.0009, "lr": 1.6621434092716214e-06, "epoch": 19.533898305084747, "percentage": 97.67, "elapsed_time": "0:53:11", "remaining_time": "0:01:16", "throughput": 2071.1, "total_tokens": 6609256}
|
| 3247 |
+
{"current_steps": 16140, "total_steps": 16520, "loss": 0.0014, "lr": 1.619384698374693e-06, "epoch": 19.53995157384988, "percentage": 97.7, "elapsed_time": "0:53:12", "remaining_time": "0:01:15", "throughput": 2071.18, "total_tokens": 6611528}
|
| 3248 |
+
{"current_steps": 16145, "total_steps": 16520, "loss": 0.0014, "lr": 1.5771822704299666e-06, "epoch": 19.546004842615012, "percentage": 97.73, "elapsed_time": "0:53:13", "remaining_time": "0:01:14", "throughput": 2071.22, "total_tokens": 6613576}
|
| 3249 |
+
{"current_steps": 16150, "total_steps": 16520, "loss": 0.0008, "lr": 1.5355361725428728e-06, "epoch": 19.552058111380145, "percentage": 97.76, "elapsed_time": "0:53:14", "remaining_time": "0:01:13", "throughput": 2071.27, "total_tokens": 6615656}
|
| 3250 |
+
{"current_steps": 16155, "total_steps": 16520, "loss": 0.0005, "lr": 1.494446451198117e-06, "epoch": 19.558111380145277, "percentage": 97.79, "elapsed_time": "0:53:14", "remaining_time": "0:01:12", "throughput": 2071.29, "total_tokens": 6617640}
|
| 3251 |
+
{"current_steps": 16160, "total_steps": 16520, "loss": 0.0005, "lr": 1.4539131522591786e-06, "epoch": 19.564164648910413, "percentage": 97.82, "elapsed_time": "0:53:15", "remaining_time": "0:01:11", "throughput": 2071.36, "total_tokens": 6619848}
|
| 3252 |
+
{"current_steps": 16165, "total_steps": 16520, "loss": 0.0004, "lr": 1.4139363209685895e-06, "epoch": 19.570217917675546, "percentage": 97.85, "elapsed_time": "0:53:16", "remaining_time": "0:01:10", "throughput": 2071.42, "total_tokens": 6622024}
|
| 3253 |
+
{"current_steps": 16170, "total_steps": 16520, "loss": 0.0006, "lr": 1.3745160019476565e-06, "epoch": 19.576271186440678, "percentage": 97.88, "elapsed_time": "0:53:17", "remaining_time": "0:01:09", "throughput": 2071.44, "total_tokens": 6624072}
|
| 3254 |
+
{"current_steps": 16175, "total_steps": 16520, "loss": 0.0005, "lr": 1.3356522391967385e-06, "epoch": 19.58232445520581, "percentage": 97.91, "elapsed_time": "0:53:18", "remaining_time": "0:01:08", "throughput": 2071.48, "total_tokens": 6626120}
|
| 3255 |
+
{"current_steps": 16180, "total_steps": 16520, "loss": 0.001, "lr": 1.2973450760946914e-06, "epoch": 19.588377723970943, "percentage": 97.94, "elapsed_time": "0:53:19", "remaining_time": "0:01:07", "throughput": 2071.51, "total_tokens": 6628168}
|
| 3256 |
+
{"current_steps": 16185, "total_steps": 16520, "loss": 0.0005, "lr": 1.2595945553992573e-06, "epoch": 19.59443099273608, "percentage": 97.97, "elapsed_time": "0:53:20", "remaining_time": "0:01:06", "throughput": 2071.57, "total_tokens": 6630376}
|
| 3257 |
+
{"current_steps": 16190, "total_steps": 16520, "loss": 0.0009, "lr": 1.2224007192468967e-06, "epoch": 19.60048426150121, "percentage": 98.0, "elapsed_time": "0:53:21", "remaining_time": "0:01:05", "throughput": 2071.62, "total_tokens": 6632520}
|
| 3258 |
+
{"current_steps": 16195, "total_steps": 16520, "loss": 0.0008, "lr": 1.1857636091526791e-06, "epoch": 19.606537530266344, "percentage": 98.03, "elapsed_time": "0:53:22", "remaining_time": "0:01:04", "throughput": 2071.66, "total_tokens": 6634568}
|
| 3259 |
+
{"current_steps": 16200, "total_steps": 16520, "loss": 0.0006, "lr": 1.1496832660101708e-06, "epoch": 19.612590799031477, "percentage": 98.06, "elapsed_time": "0:53:23", "remaining_time": "0:01:03", "throughput": 2071.7, "total_tokens": 6636520}
|
| 3260 |
+
{"current_steps": 16205, "total_steps": 16520, "loss": 0.0007, "lr": 1.1141597300916018e-06, "epoch": 19.61864406779661, "percentage": 98.09, "elapsed_time": "0:53:24", "remaining_time": "0:01:02", "throughput": 2071.7, "total_tokens": 6638408}
|
| 3261 |
+
{"current_steps": 16210, "total_steps": 16520, "loss": 0.0005, "lr": 1.079193041047699e-06, "epoch": 19.624697336561745, "percentage": 98.12, "elapsed_time": "0:53:25", "remaining_time": "0:01:01", "throughput": 2071.76, "total_tokens": 6640616}
|
| 3262 |
+
{"current_steps": 16215, "total_steps": 16520, "loss": 0.0014, "lr": 1.0447832379075761e-06, "epoch": 19.630750605326877, "percentage": 98.15, "elapsed_time": "0:53:26", "remaining_time": "0:01:00", "throughput": 2071.81, "total_tokens": 6642696}
|
| 3263 |
+
{"current_steps": 16220, "total_steps": 16520, "loss": 0.0005, "lr": 1.0109303590787877e-06, "epoch": 19.63680387409201, "percentage": 98.18, "elapsed_time": "0:53:27", "remaining_time": "0:00:59", "throughput": 2071.83, "total_tokens": 6644744}
|
| 3264 |
+
{"current_steps": 16225, "total_steps": 16520, "loss": 0.0007, "lr": 9.77634442347275e-07, "epoch": 19.642857142857142, "percentage": 98.21, "elapsed_time": "0:53:28", "remaining_time": "0:00:58", "throughput": 2071.88, "total_tokens": 6646888}
|
| 3265 |
+
{"current_steps": 16230, "total_steps": 16520, "loss": 0.0031, "lr": 9.44895524877365e-07, "epoch": 19.648910411622275, "percentage": 98.24, "elapsed_time": "0:53:29", "remaining_time": "0:00:57", "throughput": 2071.95, "total_tokens": 6649128}
|
| 3266 |
+
{"current_steps": 16235, "total_steps": 16520, "loss": 0.0013, "lr": 9.12713643211549e-07, "epoch": 19.65496368038741, "percentage": 98.27, "elapsed_time": "0:53:30", "remaining_time": "0:00:56", "throughput": 2072.0, "total_tokens": 6651272}
|
| 3267 |
+
{"current_steps": 16240, "total_steps": 16520, "loss": 0.0007, "lr": 8.810888332707046e-07, "epoch": 19.661016949152543, "percentage": 98.31, "elapsed_time": "0:53:30", "remaining_time": "0:00:55", "throughput": 2072.02, "total_tokens": 6653256}
|
| 3268 |
+
{"current_steps": 16245, "total_steps": 16520, "loss": 0.0008, "lr": 8.500211303537619e-07, "epoch": 19.667070217917676, "percentage": 98.34, "elapsed_time": "0:53:31", "remaining_time": "0:00:54", "throughput": 2072.05, "total_tokens": 6655240}
|
| 3269 |
+
{"current_steps": 16250, "total_steps": 16520, "loss": 0.002, "lr": 8.195105691379268e-07, "epoch": 19.673123486682808, "percentage": 98.37, "elapsed_time": "0:53:32", "remaining_time": "0:00:53", "throughput": 2072.05, "total_tokens": 6657160}
|
| 3270 |
+
{"current_steps": 16255, "total_steps": 16520, "loss": 0.0005, "lr": 7.895571836785131e-07, "epoch": 19.67917675544794, "percentage": 98.4, "elapsed_time": "0:53:33", "remaining_time": "0:00:52", "throughput": 2072.09, "total_tokens": 6659144}
|
| 3271 |
+
{"current_steps": 16260, "total_steps": 16520, "loss": 0.0012, "lr": 7.601610074088883e-07, "epoch": 19.685230024213077, "percentage": 98.43, "elapsed_time": "0:53:34", "remaining_time": "0:00:51", "throughput": 2072.11, "total_tokens": 6661160}
|
| 3272 |
+
{"current_steps": 16265, "total_steps": 16520, "loss": 0.0009, "lr": 7.313220731405835e-07, "epoch": 19.69128329297821, "percentage": 98.46, "elapsed_time": "0:53:35", "remaining_time": "0:00:50", "throughput": 2072.17, "total_tokens": 6663272}
|
| 3273 |
+
{"current_steps": 16270, "total_steps": 16520, "loss": 0.0006, "lr": 7.030404130629608e-07, "epoch": 19.69733656174334, "percentage": 98.49, "elapsed_time": "0:53:36", "remaining_time": "0:00:49", "throughput": 2072.2, "total_tokens": 6665288}
|
| 3274 |
+
{"current_steps": 16275, "total_steps": 16520, "loss": 0.0006, "lr": 6.753160587435469e-07, "epoch": 19.703389830508474, "percentage": 98.52, "elapsed_time": "0:53:37", "remaining_time": "0:00:48", "throughput": 2072.22, "total_tokens": 6667336}
|
| 3275 |
+
{"current_steps": 16280, "total_steps": 16520, "loss": 0.0012, "lr": 6.481490411277546e-07, "epoch": 19.709443099273606, "percentage": 98.55, "elapsed_time": "0:53:38", "remaining_time": "0:00:47", "throughput": 2072.25, "total_tokens": 6669288}
|
| 3276 |
+
{"current_steps": 16285, "total_steps": 16520, "loss": 0.0007, "lr": 6.215393905388278e-07, "epoch": 19.715496368038743, "percentage": 98.58, "elapsed_time": "0:53:39", "remaining_time": "0:00:46", "throughput": 2072.29, "total_tokens": 6671368}
|
| 3277 |
+
{"current_steps": 16290, "total_steps": 16520, "loss": 0.001, "lr": 5.954871366779524e-07, "epoch": 19.721549636803875, "percentage": 98.61, "elapsed_time": "0:53:40", "remaining_time": "0:00:45", "throughput": 2072.33, "total_tokens": 6673448}
|
| 3278 |
+
{"current_steps": 16295, "total_steps": 16520, "loss": 0.0019, "lr": 5.699923086242564e-07, "epoch": 19.727602905569007, "percentage": 98.64, "elapsed_time": "0:53:41", "remaining_time": "0:00:44", "throughput": 2072.35, "total_tokens": 6675432}
|
| 3279 |
+
{"current_steps": 16300, "total_steps": 16520, "loss": 0.0015, "lr": 5.450549348344213e-07, "epoch": 19.73365617433414, "percentage": 98.67, "elapsed_time": "0:53:42", "remaining_time": "0:00:43", "throughput": 2072.39, "total_tokens": 6677384}
|
| 3280 |
+
{"current_steps": 16305, "total_steps": 16520, "loss": 0.0007, "lr": 5.206750431432372e-07, "epoch": 19.739709443099272, "percentage": 98.7, "elapsed_time": "0:53:43", "remaining_time": "0:00:42", "throughput": 2072.4, "total_tokens": 6679400}
|
| 3281 |
+
{"current_steps": 16310, "total_steps": 16520, "loss": 0.0014, "lr": 4.968526607629364e-07, "epoch": 19.74576271186441, "percentage": 98.73, "elapsed_time": "0:53:43", "remaining_time": "0:00:41", "throughput": 2072.44, "total_tokens": 6681512}
|
| 3282 |
+
{"current_steps": 16315, "total_steps": 16520, "loss": 0.0009, "lr": 4.735878142836936e-07, "epoch": 19.75181598062954, "percentage": 98.76, "elapsed_time": "0:53:44", "remaining_time": "0:00:40", "throughput": 2072.48, "total_tokens": 6683624}
|
| 3283 |
+
{"current_steps": 16320, "total_steps": 16520, "loss": 0.0006, "lr": 4.508805296732921e-07, "epoch": 19.757869249394673, "percentage": 98.79, "elapsed_time": "0:53:45", "remaining_time": "0:00:39", "throughput": 2072.51, "total_tokens": 6685672}
|
| 3284 |
+
{"current_steps": 16325, "total_steps": 16520, "loss": 0.0062, "lr": 4.2873083227706887e-07, "epoch": 19.763922518159806, "percentage": 98.82, "elapsed_time": "0:53:46", "remaining_time": "0:00:38", "throughput": 2072.56, "total_tokens": 6687816}
|
| 3285 |
+
{"current_steps": 16330, "total_steps": 16520, "loss": 0.0005, "lr": 4.07138746818192e-07, "epoch": 19.769975786924938, "percentage": 98.85, "elapsed_time": "0:53:47", "remaining_time": "0:00:37", "throughput": 2072.57, "total_tokens": 6689768}
|
| 3286 |
+
{"current_steps": 16335, "total_steps": 16520, "loss": 0.001, "lr": 3.8610429739732724e-07, "epoch": 19.776029055690074, "percentage": 98.88, "elapsed_time": "0:53:48", "remaining_time": "0:00:36", "throughput": 2072.62, "total_tokens": 6691848}
|
| 3287 |
+
{"current_steps": 16340, "total_steps": 16520, "loss": 0.0015, "lr": 3.65627507492694e-07, "epoch": 19.782082324455207, "percentage": 98.91, "elapsed_time": "0:53:49", "remaining_time": "0:00:35", "throughput": 2072.64, "total_tokens": 6693896}
|
| 3288 |
+
{"current_steps": 16345, "total_steps": 16520, "loss": 0.0005, "lr": 3.457083999601207e-07, "epoch": 19.78813559322034, "percentage": 98.94, "elapsed_time": "0:53:50", "remaining_time": "0:00:34", "throughput": 2072.7, "total_tokens": 6696104}
|
| 3289 |
+
{"current_steps": 16350, "total_steps": 16520, "loss": 0.0005, "lr": 3.2634699703293356e-07, "epoch": 19.79418886198547, "percentage": 98.97, "elapsed_time": "0:53:51", "remaining_time": "0:00:33", "throughput": 2072.74, "total_tokens": 6698088}
|
| 3290 |
+
{"current_steps": 16355, "total_steps": 16520, "loss": 0.0006, "lr": 3.075433203219569e-07, "epoch": 19.800242130750604, "percentage": 99.0, "elapsed_time": "0:53:52", "remaining_time": "0:00:32", "throughput": 2072.77, "total_tokens": 6700168}
|
| 3291 |
+
{"current_steps": 16360, "total_steps": 16520, "loss": 0.0012, "lr": 2.8929739081545724e-07, "epoch": 19.80629539951574, "percentage": 99.03, "elapsed_time": "0:53:53", "remaining_time": "0:00:31", "throughput": 2072.81, "total_tokens": 6702280}
|
| 3292 |
+
{"current_steps": 16365, "total_steps": 16520, "loss": 0.0025, "lr": 2.7160922887925486e-07, "epoch": 19.812348668280872, "percentage": 99.06, "elapsed_time": "0:53:54", "remaining_time": "0:00:30", "throughput": 2072.86, "total_tokens": 6704360}
|
| 3293 |
+
{"current_steps": 16370, "total_steps": 16520, "loss": 0.0006, "lr": 2.544788542565013e-07, "epoch": 19.818401937046005, "percentage": 99.09, "elapsed_time": "0:53:55", "remaining_time": "0:00:29", "throughput": 2072.88, "total_tokens": 6706408}
|
| 3294 |
+
{"current_steps": 16375, "total_steps": 16520, "loss": 0.0005, "lr": 2.3790628606779053e-07, "epoch": 19.824455205811137, "percentage": 99.12, "elapsed_time": "0:53:56", "remaining_time": "0:00:28", "throughput": 2072.89, "total_tokens": 6708328}
|
| 3295 |
+
{"current_steps": 16380, "total_steps": 16520, "loss": 0.0003, "lr": 2.2189154281115898e-07, "epoch": 19.83050847457627, "percentage": 99.15, "elapsed_time": "0:53:57", "remaining_time": "0:00:27", "throughput": 2072.9, "total_tokens": 6710312}
|
| 3296 |
+
{"current_steps": 16385, "total_steps": 16520, "loss": 0.0003, "lr": 2.06434642361919e-07, "epoch": 19.836561743341406, "percentage": 99.18, "elapsed_time": "0:53:58", "remaining_time": "0:00:26", "throughput": 2072.92, "total_tokens": 6712360}
|
| 3297 |
+
{"current_steps": 16390, "total_steps": 16520, "loss": 0.0017, "lr": 1.9153560197276986e-07, "epoch": 19.84261501210654, "percentage": 99.21, "elapsed_time": "0:53:59", "remaining_time": "0:00:25", "throughput": 2072.94, "total_tokens": 6714344}
|
| 3298 |
+
{"current_steps": 16395, "total_steps": 16520, "loss": 0.002, "lr": 1.7719443827368674e-07, "epoch": 19.84866828087167, "percentage": 99.24, "elapsed_time": "0:54:00", "remaining_time": "0:00:24", "throughput": 2073.02, "total_tokens": 6716616}
|
| 3299 |
+
{"current_steps": 16400, "total_steps": 16520, "loss": 0.0006, "lr": 1.6341116727214277e-07, "epoch": 19.854721549636803, "percentage": 99.27, "elapsed_time": "0:54:00", "remaining_time": "0:00:23", "throughput": 2073.07, "total_tokens": 6718728}
|
| 3300 |
+
{"current_steps": 16405, "total_steps": 16520, "loss": 0.0004, "lr": 1.5018580435260944e-07, "epoch": 19.860774818401936, "percentage": 99.3, "elapsed_time": "0:54:01", "remaining_time": "0:00:22", "throughput": 2073.09, "total_tokens": 6720712}
|
| 3301 |
+
{"current_steps": 16410, "total_steps": 16520, "loss": 0.0002, "lr": 1.375183642770561e-07, "epoch": 19.86682808716707, "percentage": 99.33, "elapsed_time": "0:54:02", "remaining_time": "0:00:21", "throughput": 2073.1, "total_tokens": 6722632}
|
| 3302 |
+
{"current_steps": 16415, "total_steps": 16520, "loss": 0.0005, "lr": 1.2540886118467264e-07, "epoch": 19.872881355932204, "percentage": 99.36, "elapsed_time": "0:54:03", "remaining_time": "0:00:20", "throughput": 2073.13, "total_tokens": 6724648}
|
| 3303 |
+
{"current_steps": 16420, "total_steps": 16520, "loss": 0.0005, "lr": 1.1385730859181376e-07, "epoch": 19.878934624697337, "percentage": 99.39, "elapsed_time": "0:54:04", "remaining_time": "0:00:19", "throughput": 2073.16, "total_tokens": 6726664}
|
| 3304 |
+
{"current_steps": 16425, "total_steps": 16520, "loss": 0.0006, "lr": 1.0286371939205452e-07, "epoch": 19.88498789346247, "percentage": 99.42, "elapsed_time": "0:54:05", "remaining_time": "0:00:18", "throughput": 2073.18, "total_tokens": 6728552}
|
| 3305 |
+
{"current_steps": 16430, "total_steps": 16520, "loss": 0.0004, "lr": 9.242810585630146e-08, "epoch": 19.8910411622276, "percentage": 99.46, "elapsed_time": "0:54:06", "remaining_time": "0:00:17", "throughput": 2073.18, "total_tokens": 6730536}
|
| 3306 |
+
{"current_steps": 16435, "total_steps": 16520, "loss": 0.0011, "lr": 8.255047963262596e-08, "epoch": 19.897094430992738, "percentage": 99.49, "elapsed_time": "0:54:07", "remaining_time": "0:00:16", "throughput": 2073.21, "total_tokens": 6732552}
|
| 3307 |
+
{"current_steps": 16440, "total_steps": 16520, "loss": 0.0007, "lr": 7.323085174609778e-08, "epoch": 19.90314769975787, "percentage": 99.52, "elapsed_time": "0:54:08", "remaining_time": "0:00:15", "throughput": 2073.24, "total_tokens": 6734536}
|
| 3308 |
+
{"current_steps": 16445, "total_steps": 16520, "loss": 0.0032, "lr": 6.446923259928461e-08, "epoch": 19.909200968523002, "percentage": 99.55, "elapsed_time": "0:54:09", "remaining_time": "0:00:14", "throughput": 2073.29, "total_tokens": 6736680}
|
| 3309 |
+
{"current_steps": 16450, "total_steps": 16520, "loss": 0.0014, "lr": 5.626563197158596e-08, "epoch": 19.915254237288135, "percentage": 99.58, "elapsed_time": "0:54:10", "remaining_time": "0:00:13", "throughput": 2073.31, "total_tokens": 6738728}
|
| 3310 |
+
{"current_steps": 16455, "total_steps": 16520, "loss": 0.0006, "lr": 4.8620059019788275e-08, "epoch": 19.921307506053267, "percentage": 99.61, "elapsed_time": "0:54:11", "remaining_time": "0:00:12", "throughput": 2073.32, "total_tokens": 6740712}
|
| 3311 |
+
{"current_steps": 16460, "total_steps": 16520, "loss": 0.0013, "lr": 4.153252227773185e-08, "epoch": 19.927360774818403, "percentage": 99.64, "elapsed_time": "0:54:12", "remaining_time": "0:00:11", "throughput": 2073.35, "total_tokens": 6742664}
|
| 3312 |
+
{"current_steps": 16465, "total_steps": 16520, "loss": 0.0006, "lr": 3.500302965636637e-08, "epoch": 19.933414043583536, "percentage": 99.67, "elapsed_time": "0:54:12", "remaining_time": "0:00:10", "throughput": 2073.39, "total_tokens": 6744744}
|
| 3313 |
+
{"current_steps": 16470, "total_steps": 16520, "loss": 0.0102, "lr": 2.9031588443750866e-08, "epoch": 19.93946731234867, "percentage": 99.7, "elapsed_time": "0:54:13", "remaining_time": "0:00:09", "throughput": 2073.42, "total_tokens": 6746728}
|
| 3314 |
+
{"current_steps": 16475, "total_steps": 16520, "loss": 0.0014, "lr": 2.3618205305220298e-08, "epoch": 19.9455205811138, "percentage": 99.73, "elapsed_time": "0:54:14", "remaining_time": "0:00:08", "throughput": 2073.44, "total_tokens": 6748776}
|
| 3315 |
+
{"current_steps": 16480, "total_steps": 16520, "loss": 0.0005, "lr": 1.8762886282941427e-08, "epoch": 19.951573849878933, "percentage": 99.76, "elapsed_time": "0:54:15", "remaining_time": "0:00:07", "throughput": 2073.48, "total_tokens": 6750888}
|
| 3316 |
+
{"current_steps": 16485, "total_steps": 16520, "loss": 0.0008, "lr": 1.446563679641244e-08, "epoch": 19.95762711864407, "percentage": 99.79, "elapsed_time": "0:54:16", "remaining_time": "0:00:06", "throughput": 2073.52, "total_tokens": 6752936}
|
| 3317 |
+
{"current_steps": 16490, "total_steps": 16520, "loss": 0.0027, "lr": 1.072646164212987e-08, "epoch": 19.9636803874092, "percentage": 99.82, "elapsed_time": "0:54:17", "remaining_time": "0:00:05", "throughput": 2073.56, "total_tokens": 6754984}
|
| 3318 |
+
{"current_steps": 16495, "total_steps": 16520, "loss": 0.0014, "lr": 7.545364993699621e-09, "epoch": 19.969733656174334, "percentage": 99.85, "elapsed_time": "0:54:18", "remaining_time": "0:00:04", "throughput": 2073.61, "total_tokens": 6757128}
|
| 3319 |
+
{"current_steps": 16500, "total_steps": 16520, "loss": 0.0005, "lr": 4.922350401781461e-09, "epoch": 19.975786924939467, "percentage": 99.88, "elapsed_time": "0:54:19", "remaining_time": "0:00:03", "throughput": 2073.62, "total_tokens": 6759080}
|
| 3320 |
+
{"current_steps": 16505, "total_steps": 16520, "loss": 0.008, "lr": 2.8574207941445275e-09, "epoch": 19.9818401937046, "percentage": 99.91, "elapsed_time": "0:54:20", "remaining_time": "0:00:02", "throughput": 2073.66, "total_tokens": 6761128}
|
| 3321 |
+
{"current_steps": 16510, "total_steps": 16520, "loss": 0.0013, "lr": 1.350578475611819e-09, "epoch": 19.987893462469735, "percentage": 99.94, "elapsed_time": "0:54:21", "remaining_time": "0:00:01", "throughput": 2073.7, "total_tokens": 6763240}
|
| 3322 |
+
{"current_steps": 16515, "total_steps": 16520, "loss": 0.0016, "lr": 4.0182512806019587e-10, "epoch": 19.993946731234868, "percentage": 99.97, "elapsed_time": "0:54:22", "remaining_time": "0:00:00", "throughput": 2073.72, "total_tokens": 6765288}
|
| 3323 |
+
{"current_steps": 16520, "total_steps": 16520, "loss": 0.0005, "lr": 1.116181058691268e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:23", "remaining_time": "0:00:00", "throughput": 2073.71, "total_tokens": 6767120}
|
| 3324 |
+
{"current_steps": 16520, "total_steps": 16520, "eval_loss": 0.3815227448940277, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:31", "remaining_time": "0:00:00", "throughput": 2068.77, "total_tokens": 6767120}
|
| 3325 |
+
{"current_steps": 16520, "total_steps": 16520, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:54:32", "remaining_time": "0:00:00", "throughput": 2068.13, "total_tokens": 6767120}
|