Training in progress, step 32436
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +382 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:864b3df4bdd85730559bdc46f9b309a4ae65a7362780b5dffcf4d3f57a1d0ce1
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -6126,3 +6126,385 @@
|
|
| 6126 |
{"current_steps": 30550, "total_steps": 38150, "loss": 0.1167, "lr": 5.806316280760551e-06, "epoch": 8.00786369593709, "percentage": 80.08, "elapsed_time": "0:50:41", "remaining_time": "0:12:36", "throughput": 2693.3, "total_tokens": 8191440}
|
| 6127 |
{"current_steps": 30555, "total_steps": 38150, "loss": 0.1686, "lr": 5.7989898203433725e-06, "epoch": 8.009174311926605, "percentage": 80.09, "elapsed_time": "0:50:41", "remaining_time": "0:12:36", "throughput": 2693.38, "total_tokens": 8192976}
|
| 6128 |
{"current_steps": 30560, "total_steps": 38150, "loss": 0.1005, "lr": 5.791667378664451e-06, "epoch": 8.01048492791612, "percentage": 80.1, "elapsed_time": "0:50:42", "remaining_time": "0:12:35", "throughput": 2693.46, "total_tokens": 8194544}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6126 |
{"current_steps": 30550, "total_steps": 38150, "loss": 0.1167, "lr": 5.806316280760551e-06, "epoch": 8.00786369593709, "percentage": 80.08, "elapsed_time": "0:50:41", "remaining_time": "0:12:36", "throughput": 2693.3, "total_tokens": 8191440}
|
| 6127 |
{"current_steps": 30555, "total_steps": 38150, "loss": 0.1686, "lr": 5.7989898203433725e-06, "epoch": 8.009174311926605, "percentage": 80.09, "elapsed_time": "0:50:41", "remaining_time": "0:12:36", "throughput": 2693.38, "total_tokens": 8192976}
|
| 6128 |
{"current_steps": 30560, "total_steps": 38150, "loss": 0.1005, "lr": 5.791667378664451e-06, "epoch": 8.01048492791612, "percentage": 80.1, "elapsed_time": "0:50:42", "remaining_time": "0:12:35", "throughput": 2693.46, "total_tokens": 8194544}
|
| 6129 |
+
{"current_steps": 30565, "total_steps": 38150, "loss": 0.1504, "lr": 5.784348957256352e-06, "epoch": 8.011795543905636, "percentage": 80.12, "elapsed_time": "0:50:42", "remaining_time": "0:12:35", "throughput": 2693.47, "total_tokens": 8195728}
|
| 6130 |
+
{"current_steps": 30570, "total_steps": 38150, "loss": 0.154, "lr": 5.7770345576508176e-06, "epoch": 8.01310615989515, "percentage": 80.13, "elapsed_time": "0:50:43", "remaining_time": "0:12:34", "throughput": 2693.5, "total_tokens": 8196992}
|
| 6131 |
+
{"current_steps": 30575, "total_steps": 38150, "loss": 0.1266, "lr": 5.769724181378736e-06, "epoch": 8.014416775884666, "percentage": 80.14, "elapsed_time": "0:50:43", "remaining_time": "0:12:34", "throughput": 2693.59, "total_tokens": 8198544}
|
| 6132 |
+
{"current_steps": 30580, "total_steps": 38150, "loss": 0.1359, "lr": 5.7624178299701546e-06, "epoch": 8.015727391874181, "percentage": 80.16, "elapsed_time": "0:50:44", "remaining_time": "0:12:33", "throughput": 2693.55, "total_tokens": 8199504}
|
| 6133 |
+
{"current_steps": 30585, "total_steps": 38150, "loss": 0.168, "lr": 5.755115504954284e-06, "epoch": 8.017038007863697, "percentage": 80.17, "elapsed_time": "0:50:44", "remaining_time": "0:12:33", "throughput": 2693.59, "total_tokens": 8200864}
|
| 6134 |
+
{"current_steps": 30590, "total_steps": 38150, "loss": 0.2716, "lr": 5.747817207859491e-06, "epoch": 8.01834862385321, "percentage": 80.18, "elapsed_time": "0:50:45", "remaining_time": "0:12:32", "throughput": 2693.59, "total_tokens": 8202000}
|
| 6135 |
+
{"current_steps": 30595, "total_steps": 38150, "loss": 0.1556, "lr": 5.740522940213283e-06, "epoch": 8.019659239842726, "percentage": 80.2, "elapsed_time": "0:50:45", "remaining_time": "0:12:32", "throughput": 2693.61, "total_tokens": 8203200}
|
| 6136 |
+
{"current_steps": 30600, "total_steps": 38150, "loss": 0.2044, "lr": 5.733232703542346e-06, "epoch": 8.020969855832242, "percentage": 80.21, "elapsed_time": "0:50:45", "remaining_time": "0:12:31", "throughput": 2693.64, "total_tokens": 8204480}
|
| 6137 |
+
{"current_steps": 30605, "total_steps": 38150, "loss": 0.1213, "lr": 5.725946499372506e-06, "epoch": 8.022280471821757, "percentage": 80.22, "elapsed_time": "0:50:46", "remaining_time": "0:12:31", "throughput": 2693.62, "total_tokens": 8205616}
|
| 6138 |
+
{"current_steps": 30610, "total_steps": 38150, "loss": 0.1804, "lr": 5.7186643292287705e-06, "epoch": 8.02359108781127, "percentage": 80.24, "elapsed_time": "0:50:47", "remaining_time": "0:12:30", "throughput": 2693.82, "total_tokens": 8208176}
|
| 6139 |
+
{"current_steps": 30615, "total_steps": 38150, "loss": 0.1389, "lr": 5.711386194635274e-06, "epoch": 8.024901703800786, "percentage": 80.25, "elapsed_time": "0:50:47", "remaining_time": "0:12:30", "throughput": 2693.78, "total_tokens": 8209168}
|
| 6140 |
+
{"current_steps": 30620, "total_steps": 38150, "loss": 0.188, "lr": 5.704112097115316e-06, "epoch": 8.026212319790302, "percentage": 80.26, "elapsed_time": "0:50:47", "remaining_time": "0:12:29", "throughput": 2693.82, "total_tokens": 8210464}
|
| 6141 |
+
{"current_steps": 30625, "total_steps": 38150, "loss": 0.1101, "lr": 5.696842038191366e-06, "epoch": 8.027522935779816, "percentage": 80.28, "elapsed_time": "0:50:48", "remaining_time": "0:12:29", "throughput": 2693.82, "total_tokens": 8211616}
|
| 6142 |
+
{"current_steps": 30630, "total_steps": 38150, "loss": 0.1572, "lr": 5.689576019385015e-06, "epoch": 8.028833551769331, "percentage": 80.29, "elapsed_time": "0:50:48", "remaining_time": "0:12:28", "throughput": 2693.85, "total_tokens": 8212832}
|
| 6143 |
+
{"current_steps": 30635, "total_steps": 38150, "loss": 0.2468, "lr": 5.682314042217041e-06, "epoch": 8.030144167758847, "percentage": 80.3, "elapsed_time": "0:50:49", "remaining_time": "0:12:27", "throughput": 2693.87, "total_tokens": 8214048}
|
| 6144 |
+
{"current_steps": 30640, "total_steps": 38150, "loss": 0.1044, "lr": 5.675056108207355e-06, "epoch": 8.031454783748362, "percentage": 80.31, "elapsed_time": "0:50:49", "remaining_time": "0:12:27", "throughput": 2693.91, "total_tokens": 8215328}
|
| 6145 |
+
{"current_steps": 30645, "total_steps": 38150, "loss": 0.2197, "lr": 5.667802218875037e-06, "epoch": 8.032765399737876, "percentage": 80.33, "elapsed_time": "0:50:50", "remaining_time": "0:12:26", "throughput": 2693.96, "total_tokens": 8216720}
|
| 6146 |
+
{"current_steps": 30650, "total_steps": 38150, "loss": 0.1888, "lr": 5.66055237573831e-06, "epoch": 8.034076015727392, "percentage": 80.34, "elapsed_time": "0:50:50", "remaining_time": "0:12:26", "throughput": 2694.02, "total_tokens": 8218112}
|
| 6147 |
+
{"current_steps": 30655, "total_steps": 38150, "loss": 0.1013, "lr": 5.6533065803145545e-06, "epoch": 8.035386631716907, "percentage": 80.35, "elapsed_time": "0:50:50", "remaining_time": "0:12:25", "throughput": 2694.08, "total_tokens": 8219552}
|
| 6148 |
+
{"current_steps": 30660, "total_steps": 38150, "loss": 0.1647, "lr": 5.646064834120304e-06, "epoch": 8.036697247706423, "percentage": 80.37, "elapsed_time": "0:50:51", "remaining_time": "0:12:25", "throughput": 2694.19, "total_tokens": 8221136}
|
| 6149 |
+
{"current_steps": 30665, "total_steps": 38150, "loss": 0.1978, "lr": 5.638827138671243e-06, "epoch": 8.038007863695936, "percentage": 80.38, "elapsed_time": "0:50:51", "remaining_time": "0:12:24", "throughput": 2694.24, "total_tokens": 8222496}
|
| 6150 |
+
{"current_steps": 30670, "total_steps": 38150, "loss": 0.0835, "lr": 5.631593495482209e-06, "epoch": 8.039318479685452, "percentage": 80.39, "elapsed_time": "0:50:52", "remaining_time": "0:12:24", "throughput": 2694.36, "total_tokens": 8224272}
|
| 6151 |
+
{"current_steps": 30675, "total_steps": 38150, "loss": 0.0944, "lr": 5.62436390606719e-06, "epoch": 8.040629095674968, "percentage": 80.41, "elapsed_time": "0:50:52", "remaining_time": "0:12:23", "throughput": 2694.42, "total_tokens": 8225632}
|
| 6152 |
+
{"current_steps": 30680, "total_steps": 38150, "loss": 0.1299, "lr": 5.617138371939326e-06, "epoch": 8.041939711664483, "percentage": 80.42, "elapsed_time": "0:50:53", "remaining_time": "0:12:23", "throughput": 2694.46, "total_tokens": 8226928}
|
| 6153 |
+
{"current_steps": 30685, "total_steps": 38150, "loss": 0.1827, "lr": 5.6099168946109114e-06, "epoch": 8.043250327653997, "percentage": 80.43, "elapsed_time": "0:50:53", "remaining_time": "0:12:22", "throughput": 2694.46, "total_tokens": 8228064}
|
| 6154 |
+
{"current_steps": 30690, "total_steps": 38150, "loss": 0.1365, "lr": 5.602699475593387e-06, "epoch": 8.044560943643512, "percentage": 80.45, "elapsed_time": "0:50:54", "remaining_time": "0:12:22", "throughput": 2694.45, "total_tokens": 8229168}
|
| 6155 |
+
{"current_steps": 30695, "total_steps": 38150, "loss": 0.1354, "lr": 5.595486116397344e-06, "epoch": 8.045871559633028, "percentage": 80.46, "elapsed_time": "0:50:54", "remaining_time": "0:12:21", "throughput": 2694.46, "total_tokens": 8230336}
|
| 6156 |
+
{"current_steps": 30700, "total_steps": 38150, "loss": 0.1573, "lr": 5.588276818532531e-06, "epoch": 8.047182175622543, "percentage": 80.47, "elapsed_time": "0:50:54", "remaining_time": "0:12:21", "throughput": 2694.44, "total_tokens": 8231408}
|
| 6157 |
+
{"current_steps": 30705, "total_steps": 38150, "loss": 0.1367, "lr": 5.5810715835078434e-06, "epoch": 8.048492791612057, "percentage": 80.48, "elapsed_time": "0:50:55", "remaining_time": "0:12:20", "throughput": 2694.51, "total_tokens": 8232784}
|
| 6158 |
+
{"current_steps": 30710, "total_steps": 38150, "loss": 0.1698, "lr": 5.573870412831314e-06, "epoch": 8.049803407601573, "percentage": 80.5, "elapsed_time": "0:50:55", "remaining_time": "0:12:20", "throughput": 2694.6, "total_tokens": 8234336}
|
| 6159 |
+
{"current_steps": 30715, "total_steps": 38150, "loss": 0.129, "lr": 5.566673308010142e-06, "epoch": 8.051114023591088, "percentage": 80.51, "elapsed_time": "0:50:56", "remaining_time": "0:12:19", "throughput": 2694.59, "total_tokens": 8235392}
|
| 6160 |
+
{"current_steps": 30720, "total_steps": 38150, "loss": 0.187, "lr": 5.5594802705506626e-06, "epoch": 8.052424639580602, "percentage": 80.52, "elapsed_time": "0:50:56", "remaining_time": "0:12:19", "throughput": 2694.61, "total_tokens": 8236608}
|
| 6161 |
+
{"current_steps": 30725, "total_steps": 38150, "loss": 0.1827, "lr": 5.552291301958379e-06, "epoch": 8.053735255570118, "percentage": 80.54, "elapsed_time": "0:50:57", "remaining_time": "0:12:18", "throughput": 2694.68, "total_tokens": 8238096}
|
| 6162 |
+
{"current_steps": 30730, "total_steps": 38150, "loss": 0.1404, "lr": 5.545106403737921e-06, "epoch": 8.055045871559633, "percentage": 80.55, "elapsed_time": "0:50:57", "remaining_time": "0:12:18", "throughput": 2694.68, "total_tokens": 8239280}
|
| 6163 |
+
{"current_steps": 30735, "total_steps": 38150, "loss": 0.1781, "lr": 5.537925577393077e-06, "epoch": 8.056356487549149, "percentage": 80.56, "elapsed_time": "0:50:58", "remaining_time": "0:12:17", "throughput": 2694.68, "total_tokens": 8240448}
|
| 6164 |
+
{"current_steps": 30740, "total_steps": 38150, "loss": 0.1619, "lr": 5.530748824426782e-06, "epoch": 8.057667103538662, "percentage": 80.58, "elapsed_time": "0:50:58", "remaining_time": "0:12:17", "throughput": 2694.66, "total_tokens": 8241504}
|
| 6165 |
+
{"current_steps": 30745, "total_steps": 38150, "loss": 0.0968, "lr": 5.523576146341128e-06, "epoch": 8.058977719528178, "percentage": 80.59, "elapsed_time": "0:50:58", "remaining_time": "0:12:16", "throughput": 2694.71, "total_tokens": 8242784}
|
| 6166 |
+
{"current_steps": 30750, "total_steps": 38150, "loss": 0.1786, "lr": 5.5164075446373225e-06, "epoch": 8.060288335517694, "percentage": 80.6, "elapsed_time": "0:50:59", "remaining_time": "0:12:16", "throughput": 2694.67, "total_tokens": 8243760}
|
| 6167 |
+
{"current_steps": 30755, "total_steps": 38150, "loss": 0.2466, "lr": 5.509243020815755e-06, "epoch": 8.061598951507209, "percentage": 80.62, "elapsed_time": "0:50:59", "remaining_time": "0:12:15", "throughput": 2694.86, "total_tokens": 8245888}
|
| 6168 |
+
{"current_steps": 30760, "total_steps": 38150, "loss": 0.1369, "lr": 5.502082576375947e-06, "epoch": 8.062909567496723, "percentage": 80.63, "elapsed_time": "0:51:00", "remaining_time": "0:12:15", "throughput": 2694.82, "total_tokens": 8246896}
|
| 6169 |
+
{"current_steps": 30765, "total_steps": 38150, "loss": 0.182, "lr": 5.494926212816562e-06, "epoch": 8.064220183486238, "percentage": 80.64, "elapsed_time": "0:51:00", "remaining_time": "0:12:14", "throughput": 2694.96, "total_tokens": 8248704}
|
| 6170 |
+
{"current_steps": 30770, "total_steps": 38150, "loss": 0.1829, "lr": 5.4877739316354236e-06, "epoch": 8.065530799475754, "percentage": 80.66, "elapsed_time": "0:51:01", "remaining_time": "0:12:14", "throughput": 2695.08, "total_tokens": 8250368}
|
| 6171 |
+
{"current_steps": 30775, "total_steps": 38150, "loss": 0.1571, "lr": 5.4806257343294755e-06, "epoch": 8.06684141546527, "percentage": 80.67, "elapsed_time": "0:51:01", "remaining_time": "0:12:13", "throughput": 2695.19, "total_tokens": 8252000}
|
| 6172 |
+
{"current_steps": 30780, "total_steps": 38150, "loss": 0.1158, "lr": 5.473481622394849e-06, "epoch": 8.068152031454783, "percentage": 80.68, "elapsed_time": "0:51:02", "remaining_time": "0:12:13", "throughput": 2695.2, "total_tokens": 8253248}
|
| 6173 |
+
{"current_steps": 30785, "total_steps": 38150, "loss": 0.145, "lr": 5.466341597326774e-06, "epoch": 8.069462647444299, "percentage": 80.69, "elapsed_time": "0:51:02", "remaining_time": "0:12:12", "throughput": 2695.2, "total_tokens": 8254400}
|
| 6174 |
+
{"current_steps": 30790, "total_steps": 38150, "loss": 0.1718, "lr": 5.459205660619651e-06, "epoch": 8.070773263433814, "percentage": 80.71, "elapsed_time": "0:51:03", "remaining_time": "0:12:12", "throughput": 2695.24, "total_tokens": 8255648}
|
| 6175 |
+
{"current_steps": 30795, "total_steps": 38150, "loss": 0.1564, "lr": 5.452073813767019e-06, "epoch": 8.07208387942333, "percentage": 80.72, "elapsed_time": "0:51:03", "remaining_time": "0:12:11", "throughput": 2695.28, "total_tokens": 8256928}
|
| 6176 |
+
{"current_steps": 30800, "total_steps": 38150, "loss": 0.1408, "lr": 5.444946058261563e-06, "epoch": 8.073394495412844, "percentage": 80.73, "elapsed_time": "0:51:03", "remaining_time": "0:12:11", "throughput": 2695.31, "total_tokens": 8258368}
|
| 6177 |
+
{"current_steps": 30805, "total_steps": 38150, "loss": 0.2378, "lr": 5.4378223955951074e-06, "epoch": 8.07470511140236, "percentage": 80.75, "elapsed_time": "0:51:04", "remaining_time": "0:12:10", "throughput": 2695.56, "total_tokens": 8260864}
|
| 6178 |
+
{"current_steps": 30810, "total_steps": 38150, "loss": 0.1214, "lr": 5.430702827258622e-06, "epoch": 8.076015727391875, "percentage": 80.76, "elapsed_time": "0:51:05", "remaining_time": "0:12:10", "throughput": 2695.56, "total_tokens": 8261984}
|
| 6179 |
+
{"current_steps": 30815, "total_steps": 38150, "loss": 0.0946, "lr": 5.423587354742224e-06, "epoch": 8.077326343381388, "percentage": 80.77, "elapsed_time": "0:51:05", "remaining_time": "0:12:09", "throughput": 2695.63, "total_tokens": 8263424}
|
| 6180 |
+
{"current_steps": 30820, "total_steps": 38150, "loss": 0.0879, "lr": 5.4164759795351655e-06, "epoch": 8.078636959370904, "percentage": 80.79, "elapsed_time": "0:51:05", "remaining_time": "0:12:09", "throughput": 2695.66, "total_tokens": 8264672}
|
| 6181 |
+
{"current_steps": 30825, "total_steps": 38150, "loss": 0.1413, "lr": 5.4093687031258554e-06, "epoch": 8.07994757536042, "percentage": 80.8, "elapsed_time": "0:51:06", "remaining_time": "0:12:08", "throughput": 2695.74, "total_tokens": 8266272}
|
| 6182 |
+
{"current_steps": 30830, "total_steps": 38150, "loss": 0.149, "lr": 5.402265527001818e-06, "epoch": 8.081258191349935, "percentage": 80.81, "elapsed_time": "0:51:06", "remaining_time": "0:12:08", "throughput": 2695.71, "total_tokens": 8267296}
|
| 6183 |
+
{"current_steps": 30835, "total_steps": 38150, "loss": 0.2399, "lr": 5.3951664526497356e-06, "epoch": 8.082568807339449, "percentage": 80.83, "elapsed_time": "0:51:07", "remaining_time": "0:12:07", "throughput": 2695.75, "total_tokens": 8268656}
|
| 6184 |
+
{"current_steps": 30840, "total_steps": 38150, "loss": 0.1154, "lr": 5.388071481555448e-06, "epoch": 8.083879423328964, "percentage": 80.84, "elapsed_time": "0:51:07", "remaining_time": "0:12:07", "throughput": 2695.77, "total_tokens": 8269904}
|
| 6185 |
+
{"current_steps": 30845, "total_steps": 38150, "loss": 0.1591, "lr": 5.3809806152039125e-06, "epoch": 8.08519003931848, "percentage": 80.85, "elapsed_time": "0:51:08", "remaining_time": "0:12:06", "throughput": 2695.77, "total_tokens": 8270992}
|
| 6186 |
+
{"current_steps": 30850, "total_steps": 38150, "loss": 0.1791, "lr": 5.373893855079235e-06, "epoch": 8.086500655307995, "percentage": 80.87, "elapsed_time": "0:51:08", "remaining_time": "0:12:06", "throughput": 2695.81, "total_tokens": 8272352}
|
| 6187 |
+
{"current_steps": 30855, "total_steps": 38150, "loss": 0.1697, "lr": 5.366811202664662e-06, "epoch": 8.08781127129751, "percentage": 80.88, "elapsed_time": "0:51:09", "remaining_time": "0:12:05", "throughput": 2695.8, "total_tokens": 8273424}
|
| 6188 |
+
{"current_steps": 30860, "total_steps": 38150, "loss": 0.1959, "lr": 5.35973265944259e-06, "epoch": 8.089121887287025, "percentage": 80.89, "elapsed_time": "0:51:09", "remaining_time": "0:12:05", "throughput": 2695.89, "total_tokens": 8274976}
|
| 6189 |
+
{"current_steps": 30865, "total_steps": 38150, "loss": 0.1756, "lr": 5.352658226894527e-06, "epoch": 8.09043250327654, "percentage": 80.9, "elapsed_time": "0:51:09", "remaining_time": "0:12:04", "throughput": 2695.95, "total_tokens": 8276384}
|
| 6190 |
+
{"current_steps": 30870, "total_steps": 38150, "loss": 0.1193, "lr": 5.345587906501148e-06, "epoch": 8.091743119266056, "percentage": 80.92, "elapsed_time": "0:51:10", "remaining_time": "0:12:04", "throughput": 2696.03, "total_tokens": 8277920}
|
| 6191 |
+
{"current_steps": 30875, "total_steps": 38150, "loss": 0.1921, "lr": 5.338521699742263e-06, "epoch": 8.09305373525557, "percentage": 80.93, "elapsed_time": "0:51:10", "remaining_time": "0:12:03", "throughput": 2696.11, "total_tokens": 8279488}
|
| 6192 |
+
{"current_steps": 30880, "total_steps": 38150, "loss": 0.1919, "lr": 5.331459608096815e-06, "epoch": 8.094364351245085, "percentage": 80.94, "elapsed_time": "0:51:11", "remaining_time": "0:12:03", "throughput": 2696.17, "total_tokens": 8280912}
|
| 6193 |
+
{"current_steps": 30885, "total_steps": 38150, "loss": 0.2094, "lr": 5.324401633042883e-06, "epoch": 8.0956749672346, "percentage": 80.96, "elapsed_time": "0:51:11", "remaining_time": "0:12:02", "throughput": 2696.16, "total_tokens": 8282016}
|
| 6194 |
+
{"current_steps": 30890, "total_steps": 38150, "loss": 0.068, "lr": 5.317347776057685e-06, "epoch": 8.096985583224116, "percentage": 80.97, "elapsed_time": "0:51:12", "remaining_time": "0:12:02", "throughput": 2696.16, "total_tokens": 8283120}
|
| 6195 |
+
{"current_steps": 30895, "total_steps": 38150, "loss": 0.0998, "lr": 5.310298038617606e-06, "epoch": 8.09829619921363, "percentage": 80.98, "elapsed_time": "0:51:12", "remaining_time": "0:12:01", "throughput": 2696.21, "total_tokens": 8284496}
|
| 6196 |
+
{"current_steps": 30900, "total_steps": 38150, "loss": 0.1794, "lr": 5.303252422198115e-06, "epoch": 8.099606815203146, "percentage": 81.0, "elapsed_time": "0:51:13", "remaining_time": "0:12:01", "throughput": 2696.26, "total_tokens": 8285840}
|
| 6197 |
+
{"current_steps": 30905, "total_steps": 38150, "loss": 0.2137, "lr": 5.296210928273862e-06, "epoch": 8.100917431192661, "percentage": 81.01, "elapsed_time": "0:51:13", "remaining_time": "0:12:00", "throughput": 2696.27, "total_tokens": 8287008}
|
| 6198 |
+
{"current_steps": 30910, "total_steps": 38150, "loss": 0.179, "lr": 5.289173558318617e-06, "epoch": 8.102228047182175, "percentage": 81.02, "elapsed_time": "0:51:13", "remaining_time": "0:12:00", "throughput": 2696.31, "total_tokens": 8288416}
|
| 6199 |
+
{"current_steps": 30915, "total_steps": 38150, "loss": 0.0812, "lr": 5.282140313805284e-06, "epoch": 8.10353866317169, "percentage": 81.04, "elapsed_time": "0:51:14", "remaining_time": "0:11:59", "throughput": 2696.37, "total_tokens": 8289808}
|
| 6200 |
+
{"current_steps": 30920, "total_steps": 38150, "loss": 0.1078, "lr": 5.275111196205917e-06, "epoch": 8.104849279161206, "percentage": 81.05, "elapsed_time": "0:51:14", "remaining_time": "0:11:58", "throughput": 2696.43, "total_tokens": 8291184}
|
| 6201 |
+
{"current_steps": 30925, "total_steps": 38150, "loss": 0.1717, "lr": 5.268086206991693e-06, "epoch": 8.106159895150721, "percentage": 81.06, "elapsed_time": "0:51:15", "remaining_time": "0:11:58", "throughput": 2696.53, "total_tokens": 8292784}
|
| 6202 |
+
{"current_steps": 30930, "total_steps": 38150, "loss": 0.1445, "lr": 5.2610653476329345e-06, "epoch": 8.107470511140235, "percentage": 81.07, "elapsed_time": "0:51:15", "remaining_time": "0:11:57", "throughput": 2696.61, "total_tokens": 8294304}
|
| 6203 |
+
{"current_steps": 30935, "total_steps": 38150, "loss": 0.1072, "lr": 5.254048619599089e-06, "epoch": 8.10878112712975, "percentage": 81.09, "elapsed_time": "0:51:16", "remaining_time": "0:11:57", "throughput": 2696.63, "total_tokens": 8295488}
|
| 6204 |
+
{"current_steps": 30940, "total_steps": 38150, "loss": 0.1179, "lr": 5.247036024358759e-06, "epoch": 8.110091743119266, "percentage": 81.1, "elapsed_time": "0:51:16", "remaining_time": "0:11:56", "throughput": 2696.61, "total_tokens": 8296544}
|
| 6205 |
+
{"current_steps": 30945, "total_steps": 38150, "loss": 0.237, "lr": 5.240027563379649e-06, "epoch": 8.111402359108782, "percentage": 81.11, "elapsed_time": "0:51:17", "remaining_time": "0:11:56", "throughput": 2696.83, "total_tokens": 8299472}
|
| 6206 |
+
{"current_steps": 30950, "total_steps": 38150, "loss": 0.1362, "lr": 5.233023238128623e-06, "epoch": 8.112712975098296, "percentage": 81.13, "elapsed_time": "0:51:18", "remaining_time": "0:11:56", "throughput": 2697.12, "total_tokens": 8302656}
|
| 6207 |
+
{"current_steps": 30955, "total_steps": 38150, "loss": 0.1332, "lr": 5.226023050071682e-06, "epoch": 8.114023591087811, "percentage": 81.14, "elapsed_time": "0:51:18", "remaining_time": "0:11:55", "throughput": 2697.22, "total_tokens": 8304304}
|
| 6208 |
+
{"current_steps": 30960, "total_steps": 38150, "loss": 0.0718, "lr": 5.219027000673954e-06, "epoch": 8.115334207077327, "percentage": 81.15, "elapsed_time": "0:51:19", "remaining_time": "0:11:55", "throughput": 2697.2, "total_tokens": 8305344}
|
| 6209 |
+
{"current_steps": 30965, "total_steps": 38150, "loss": 0.1392, "lr": 5.212035091399694e-06, "epoch": 8.116644823066842, "percentage": 81.17, "elapsed_time": "0:51:19", "remaining_time": "0:11:54", "throughput": 2697.23, "total_tokens": 8306560}
|
| 6210 |
+
{"current_steps": 30970, "total_steps": 38150, "loss": 0.1469, "lr": 5.205047323712298e-06, "epoch": 8.117955439056356, "percentage": 81.18, "elapsed_time": "0:51:20", "remaining_time": "0:11:54", "throughput": 2697.26, "total_tokens": 8307888}
|
| 6211 |
+
{"current_steps": 30975, "total_steps": 38150, "loss": 0.127, "lr": 5.198063699074293e-06, "epoch": 8.119266055045872, "percentage": 81.19, "elapsed_time": "0:51:20", "remaining_time": "0:11:53", "throughput": 2697.35, "total_tokens": 8309424}
|
| 6212 |
+
{"current_steps": 30980, "total_steps": 38150, "loss": 0.117, "lr": 5.191084218947351e-06, "epoch": 8.120576671035387, "percentage": 81.21, "elapsed_time": "0:51:21", "remaining_time": "0:11:53", "throughput": 2697.39, "total_tokens": 8310752}
|
| 6213 |
+
{"current_steps": 30985, "total_steps": 38150, "loss": 0.0813, "lr": 5.184108884792244e-06, "epoch": 8.1218872870249, "percentage": 81.22, "elapsed_time": "0:51:21", "remaining_time": "0:11:52", "throughput": 2697.39, "total_tokens": 8311888}
|
| 6214 |
+
{"current_steps": 30990, "total_steps": 38150, "loss": 0.1972, "lr": 5.177137698068912e-06, "epoch": 8.123197903014416, "percentage": 81.23, "elapsed_time": "0:51:21", "remaining_time": "0:11:52", "throughput": 2697.35, "total_tokens": 8312896}
|
| 6215 |
+
{"current_steps": 30995, "total_steps": 38150, "loss": 0.1856, "lr": 5.170170660236406e-06, "epoch": 8.124508519003932, "percentage": 81.25, "elapsed_time": "0:51:22", "remaining_time": "0:11:51", "throughput": 2697.38, "total_tokens": 8314144}
|
| 6216 |
+
{"current_steps": 31000, "total_steps": 38150, "loss": 0.1081, "lr": 5.163207772752918e-06, "epoch": 8.125819134993447, "percentage": 81.26, "elapsed_time": "0:51:22", "remaining_time": "0:11:51", "throughput": 2697.35, "total_tokens": 8315168}
|
| 6217 |
+
{"current_steps": 31005, "total_steps": 38150, "loss": 0.2146, "lr": 5.156249037075761e-06, "epoch": 8.127129750982961, "percentage": 81.27, "elapsed_time": "0:51:23", "remaining_time": "0:11:50", "throughput": 2697.42, "total_tokens": 8316560}
|
| 6218 |
+
{"current_steps": 31010, "total_steps": 38150, "loss": 0.154, "lr": 5.149294454661399e-06, "epoch": 8.128440366972477, "percentage": 81.28, "elapsed_time": "0:51:23", "remaining_time": "0:11:49", "throughput": 2697.45, "total_tokens": 8317904}
|
| 6219 |
+
{"current_steps": 31015, "total_steps": 38150, "loss": 0.0472, "lr": 5.142344026965418e-06, "epoch": 8.129750982961992, "percentage": 81.3, "elapsed_time": "0:51:24", "remaining_time": "0:11:49", "throughput": 2697.43, "total_tokens": 8318960}
|
| 6220 |
+
{"current_steps": 31020, "total_steps": 38150, "loss": 0.1025, "lr": 5.135397755442512e-06, "epoch": 8.131061598951508, "percentage": 81.31, "elapsed_time": "0:51:24", "remaining_time": "0:11:48", "throughput": 2697.5, "total_tokens": 8320352}
|
| 6221 |
+
{"current_steps": 31025, "total_steps": 38150, "loss": 0.2823, "lr": 5.1284556415465315e-06, "epoch": 8.132372214941022, "percentage": 81.32, "elapsed_time": "0:51:24", "remaining_time": "0:11:48", "throughput": 2697.63, "total_tokens": 8322048}
|
| 6222 |
+
{"current_steps": 31030, "total_steps": 38150, "loss": 0.208, "lr": 5.121517686730451e-06, "epoch": 8.133682830930537, "percentage": 81.34, "elapsed_time": "0:51:25", "remaining_time": "0:11:47", "throughput": 2697.72, "total_tokens": 8323584}
|
| 6223 |
+
{"current_steps": 31035, "total_steps": 38150, "loss": 0.1153, "lr": 5.1145838924463725e-06, "epoch": 8.134993446920053, "percentage": 81.35, "elapsed_time": "0:51:25", "remaining_time": "0:11:47", "throughput": 2697.76, "total_tokens": 8324944}
|
| 6224 |
+
{"current_steps": 31040, "total_steps": 38150, "loss": 0.125, "lr": 5.107654260145523e-06, "epoch": 8.136304062909568, "percentage": 81.36, "elapsed_time": "0:51:26", "remaining_time": "0:11:46", "throughput": 2697.82, "total_tokens": 8326352}
|
| 6225 |
+
{"current_steps": 31045, "total_steps": 38150, "loss": 0.1457, "lr": 5.100728791278267e-06, "epoch": 8.137614678899082, "percentage": 81.38, "elapsed_time": "0:51:26", "remaining_time": "0:11:46", "throughput": 2697.84, "total_tokens": 8327664}
|
| 6226 |
+
{"current_steps": 31050, "total_steps": 38150, "loss": 0.1348, "lr": 5.0938074872940915e-06, "epoch": 8.138925294888598, "percentage": 81.39, "elapsed_time": "0:51:27", "remaining_time": "0:11:45", "throughput": 2697.84, "total_tokens": 8328848}
|
| 6227 |
+
{"current_steps": 31055, "total_steps": 38150, "loss": 0.1378, "lr": 5.086890349641616e-06, "epoch": 8.140235910878113, "percentage": 81.4, "elapsed_time": "0:51:27", "remaining_time": "0:11:45", "throughput": 2697.84, "total_tokens": 8329984}
|
| 6228 |
+
{"current_steps": 31060, "total_steps": 38150, "loss": 0.1435, "lr": 5.079977379768588e-06, "epoch": 8.141546526867629, "percentage": 81.42, "elapsed_time": "0:51:28", "remaining_time": "0:11:44", "throughput": 2697.85, "total_tokens": 8331168}
|
| 6229 |
+
{"current_steps": 31065, "total_steps": 38150, "loss": 0.1557, "lr": 5.073068579121862e-06, "epoch": 8.142857142857142, "percentage": 81.43, "elapsed_time": "0:51:28", "remaining_time": "0:11:44", "throughput": 2697.9, "total_tokens": 8332496}
|
| 6230 |
+
{"current_steps": 31070, "total_steps": 38150, "loss": 0.207, "lr": 5.066163949147457e-06, "epoch": 8.144167758846658, "percentage": 81.44, "elapsed_time": "0:51:28", "remaining_time": "0:11:43", "throughput": 2697.9, "total_tokens": 8333616}
|
| 6231 |
+
{"current_steps": 31075, "total_steps": 38150, "loss": 0.1421, "lr": 5.059263491290495e-06, "epoch": 8.145478374836173, "percentage": 81.45, "elapsed_time": "0:51:29", "remaining_time": "0:11:43", "throughput": 2697.95, "total_tokens": 8334928}
|
| 6232 |
+
{"current_steps": 31080, "total_steps": 38150, "loss": 0.1558, "lr": 5.052367206995229e-06, "epoch": 8.146788990825687, "percentage": 81.47, "elapsed_time": "0:51:29", "remaining_time": "0:11:42", "throughput": 2698.02, "total_tokens": 8336432}
|
| 6233 |
+
{"current_steps": 31085, "total_steps": 38150, "loss": 0.1112, "lr": 5.0454750977050406e-06, "epoch": 8.148099606815203, "percentage": 81.48, "elapsed_time": "0:51:30", "remaining_time": "0:11:42", "throughput": 2698.08, "total_tokens": 8337776}
|
| 6234 |
+
{"current_steps": 31090, "total_steps": 38150, "loss": 0.1499, "lr": 5.038587164862435e-06, "epoch": 8.149410222804718, "percentage": 81.49, "elapsed_time": "0:51:30", "remaining_time": "0:11:41", "throughput": 2698.17, "total_tokens": 8339264}
|
| 6235 |
+
{"current_steps": 31095, "total_steps": 38150, "loss": 0.0684, "lr": 5.031703409909052e-06, "epoch": 8.150720838794234, "percentage": 81.51, "elapsed_time": "0:51:31", "remaining_time": "0:11:41", "throughput": 2698.19, "total_tokens": 8340592}
|
| 6236 |
+
{"current_steps": 31100, "total_steps": 38150, "loss": 0.1416, "lr": 5.024823834285636e-06, "epoch": 8.152031454783748, "percentage": 81.52, "elapsed_time": "0:51:31", "remaining_time": "0:11:40", "throughput": 2698.19, "total_tokens": 8341712}
|
| 6237 |
+
{"current_steps": 31105, "total_steps": 38150, "loss": 0.1238, "lr": 5.017948439432077e-06, "epoch": 8.153342070773263, "percentage": 81.53, "elapsed_time": "0:51:32", "remaining_time": "0:11:40", "throughput": 2698.27, "total_tokens": 8343312}
|
| 6238 |
+
{"current_steps": 31110, "total_steps": 38150, "loss": 0.1455, "lr": 5.0110772267873836e-06, "epoch": 8.154652686762779, "percentage": 81.55, "elapsed_time": "0:51:32", "remaining_time": "0:11:39", "throughput": 2698.26, "total_tokens": 8344384}
|
| 6239 |
+
{"current_steps": 31115, "total_steps": 38150, "loss": 0.1211, "lr": 5.004210197789688e-06, "epoch": 8.155963302752294, "percentage": 81.56, "elapsed_time": "0:51:32", "remaining_time": "0:11:39", "throughput": 2698.26, "total_tokens": 8345536}
|
| 6240 |
+
{"current_steps": 31120, "total_steps": 38150, "loss": 0.1906, "lr": 4.997347353876242e-06, "epoch": 8.157273918741808, "percentage": 81.57, "elapsed_time": "0:51:33", "remaining_time": "0:11:38", "throughput": 2698.25, "total_tokens": 8346592}
|
| 6241 |
+
{"current_steps": 31125, "total_steps": 38150, "loss": 0.1141, "lr": 4.990488696483439e-06, "epoch": 8.158584534731324, "percentage": 81.59, "elapsed_time": "0:51:33", "remaining_time": "0:11:38", "throughput": 2698.22, "total_tokens": 8347616}
|
| 6242 |
+
{"current_steps": 31130, "total_steps": 38150, "loss": 0.2347, "lr": 4.983634227046785e-06, "epoch": 8.159895150720839, "percentage": 81.6, "elapsed_time": "0:51:34", "remaining_time": "0:11:37", "throughput": 2698.31, "total_tokens": 8349168}
|
| 6243 |
+
{"current_steps": 31135, "total_steps": 38150, "loss": 0.1345, "lr": 4.9767839470008935e-06, "epoch": 8.161205766710355, "percentage": 81.61, "elapsed_time": "0:51:34", "remaining_time": "0:11:37", "throughput": 2698.4, "total_tokens": 8350720}
|
| 6244 |
+
{"current_steps": 31140, "total_steps": 38150, "loss": 0.1697, "lr": 4.969937857779528e-06, "epoch": 8.162516382699868, "percentage": 81.63, "elapsed_time": "0:51:35", "remaining_time": "0:11:36", "throughput": 2698.48, "total_tokens": 8352208}
|
| 6245 |
+
{"current_steps": 31145, "total_steps": 38150, "loss": 0.1472, "lr": 4.963095960815556e-06, "epoch": 8.163826998689384, "percentage": 81.64, "elapsed_time": "0:51:35", "remaining_time": "0:11:36", "throughput": 2698.52, "total_tokens": 8353504}
|
| 6246 |
+
{"current_steps": 31150, "total_steps": 38150, "loss": 0.1708, "lr": 4.95625825754098e-06, "epoch": 8.1651376146789, "percentage": 81.65, "elapsed_time": "0:51:35", "remaining_time": "0:11:35", "throughput": 2698.5, "total_tokens": 8354560}
|
| 6247 |
+
{"current_steps": 31155, "total_steps": 38150, "loss": 0.1414, "lr": 4.9494247493869165e-06, "epoch": 8.166448230668415, "percentage": 81.66, "elapsed_time": "0:51:36", "remaining_time": "0:11:35", "throughput": 2698.53, "total_tokens": 8355808}
|
| 6248 |
+
{"current_steps": 31160, "total_steps": 38150, "loss": 0.1695, "lr": 4.942595437783609e-06, "epoch": 8.167758846657929, "percentage": 81.68, "elapsed_time": "0:51:36", "remaining_time": "0:11:34", "throughput": 2698.6, "total_tokens": 8357360}
|
| 6249 |
+
{"current_steps": 31165, "total_steps": 38150, "loss": 0.1385, "lr": 4.9357703241604185e-06, "epoch": 8.169069462647444, "percentage": 81.69, "elapsed_time": "0:51:37", "remaining_time": "0:11:34", "throughput": 2698.69, "total_tokens": 8358880}
|
| 6250 |
+
{"current_steps": 31170, "total_steps": 38150, "loss": 0.1699, "lr": 4.928949409945832e-06, "epoch": 8.17038007863696, "percentage": 81.7, "elapsed_time": "0:51:37", "remaining_time": "0:11:33", "throughput": 2698.75, "total_tokens": 8360496}
|
| 6251 |
+
{"current_steps": 31175, "total_steps": 38150, "loss": 0.2173, "lr": 4.922132696567464e-06, "epoch": 8.171690694626474, "percentage": 81.72, "elapsed_time": "0:51:38", "remaining_time": "0:11:33", "throughput": 2698.78, "total_tokens": 8361776}
|
| 6252 |
+
{"current_steps": 31180, "total_steps": 38150, "loss": 0.1353, "lr": 4.915320185452013e-06, "epoch": 8.17300131061599, "percentage": 81.73, "elapsed_time": "0:51:38", "remaining_time": "0:11:32", "throughput": 2698.78, "total_tokens": 8362928}
|
| 6253 |
+
{"current_steps": 31185, "total_steps": 38150, "loss": 0.0875, "lr": 4.9085118780253515e-06, "epoch": 8.174311926605505, "percentage": 81.74, "elapsed_time": "0:51:39", "remaining_time": "0:11:32", "throughput": 2698.79, "total_tokens": 8364176}
|
| 6254 |
+
{"current_steps": 31190, "total_steps": 38150, "loss": 0.1098, "lr": 4.9017077757124385e-06, "epoch": 8.17562254259502, "percentage": 81.76, "elapsed_time": "0:51:39", "remaining_time": "0:11:31", "throughput": 2698.81, "total_tokens": 8365376}
|
| 6255 |
+
{"current_steps": 31195, "total_steps": 38150, "loss": 0.1468, "lr": 4.89490787993736e-06, "epoch": 8.176933158584534, "percentage": 81.77, "elapsed_time": "0:51:40", "remaining_time": "0:11:31", "throughput": 2698.79, "total_tokens": 8366400}
|
| 6256 |
+
{"current_steps": 31200, "total_steps": 38150, "loss": 0.2877, "lr": 4.888112192123323e-06, "epoch": 8.17824377457405, "percentage": 81.78, "elapsed_time": "0:51:40", "remaining_time": "0:11:30", "throughput": 2698.85, "total_tokens": 8367792}
|
| 6257 |
+
{"current_steps": 31205, "total_steps": 38150, "loss": 0.2169, "lr": 4.8813207136926555e-06, "epoch": 8.179554390563565, "percentage": 81.8, "elapsed_time": "0:51:40", "remaining_time": "0:11:30", "throughput": 2698.93, "total_tokens": 8369232}
|
| 6258 |
+
{"current_steps": 31210, "total_steps": 38150, "loss": 0.197, "lr": 4.8745334460668005e-06, "epoch": 8.18086500655308, "percentage": 81.81, "elapsed_time": "0:51:41", "remaining_time": "0:11:29", "throughput": 2698.95, "total_tokens": 8370496}
|
| 6259 |
+
{"current_steps": 31215, "total_steps": 38150, "loss": 0.1037, "lr": 4.867750390666326e-06, "epoch": 8.182175622542594, "percentage": 81.82, "elapsed_time": "0:51:41", "remaining_time": "0:11:29", "throughput": 2698.92, "total_tokens": 8371488}
|
| 6260 |
+
{"current_steps": 31220, "total_steps": 38150, "loss": 0.1411, "lr": 4.860971548910903e-06, "epoch": 8.18348623853211, "percentage": 81.83, "elapsed_time": "0:51:42", "remaining_time": "0:11:28", "throughput": 2698.94, "total_tokens": 8372752}
|
| 6261 |
+
{"current_steps": 31225, "total_steps": 38150, "loss": 0.1504, "lr": 4.854196922219337e-06, "epoch": 8.184796854521625, "percentage": 81.85, "elapsed_time": "0:51:42", "remaining_time": "0:11:28", "throughput": 2699.0, "total_tokens": 8374160}
|
| 6262 |
+
{"current_steps": 31230, "total_steps": 38150, "loss": 0.0502, "lr": 4.847426512009545e-06, "epoch": 8.186107470511141, "percentage": 81.86, "elapsed_time": "0:51:43", "remaining_time": "0:11:27", "throughput": 2698.92, "total_tokens": 8375024}
|
| 6263 |
+
{"current_steps": 31235, "total_steps": 38150, "loss": 0.261, "lr": 4.840660319698559e-06, "epoch": 8.187418086500655, "percentage": 81.87, "elapsed_time": "0:51:43", "remaining_time": "0:11:27", "throughput": 2698.98, "total_tokens": 8376448}
|
| 6264 |
+
{"current_steps": 31240, "total_steps": 38150, "loss": 0.1473, "lr": 4.8338983467025394e-06, "epoch": 8.18872870249017, "percentage": 81.89, "elapsed_time": "0:51:44", "remaining_time": "0:11:26", "throughput": 2699.08, "total_tokens": 8378016}
|
| 6265 |
+
{"current_steps": 31245, "total_steps": 38150, "loss": 0.1194, "lr": 4.827140594436752e-06, "epoch": 8.190039318479686, "percentage": 81.9, "elapsed_time": "0:51:44", "remaining_time": "0:11:26", "throughput": 2699.07, "total_tokens": 8379136}
|
| 6266 |
+
{"current_steps": 31250, "total_steps": 38150, "loss": 0.194, "lr": 4.82038706431559e-06, "epoch": 8.191349934469201, "percentage": 81.91, "elapsed_time": "0:51:44", "remaining_time": "0:11:25", "throughput": 2699.16, "total_tokens": 8380688}
|
| 6267 |
+
{"current_steps": 31255, "total_steps": 38150, "loss": 0.0906, "lr": 4.81363775775254e-06, "epoch": 8.192660550458715, "percentage": 81.93, "elapsed_time": "0:51:45", "remaining_time": "0:11:25", "throughput": 2699.22, "total_tokens": 8382016}
|
| 6268 |
+
{"current_steps": 31260, "total_steps": 38150, "loss": 0.0897, "lr": 4.80689267616023e-06, "epoch": 8.19397116644823, "percentage": 81.94, "elapsed_time": "0:51:45", "remaining_time": "0:11:24", "throughput": 2699.18, "total_tokens": 8383056}
|
| 6269 |
+
{"current_steps": 31265, "total_steps": 38150, "loss": 0.1827, "lr": 4.800151820950391e-06, "epoch": 8.195281782437746, "percentage": 81.95, "elapsed_time": "0:51:46", "remaining_time": "0:11:24", "throughput": 2699.22, "total_tokens": 8384336}
|
| 6270 |
+
{"current_steps": 31270, "total_steps": 38150, "loss": 0.1608, "lr": 4.7934151935338706e-06, "epoch": 8.19659239842726, "percentage": 81.97, "elapsed_time": "0:51:46", "remaining_time": "0:11:23", "throughput": 2699.36, "total_tokens": 8386160}
|
| 6271 |
+
{"current_steps": 31275, "total_steps": 38150, "loss": 0.1553, "lr": 4.786682795320638e-06, "epoch": 8.197903014416775, "percentage": 81.98, "elapsed_time": "0:51:47", "remaining_time": "0:11:23", "throughput": 2699.41, "total_tokens": 8387600}
|
| 6272 |
+
{"current_steps": 31280, "total_steps": 38150, "loss": 0.1696, "lr": 4.779954627719771e-06, "epoch": 8.199213630406291, "percentage": 81.99, "elapsed_time": "0:51:47", "remaining_time": "0:11:22", "throughput": 2699.47, "total_tokens": 8389072}
|
| 6273 |
+
{"current_steps": 31285, "total_steps": 38150, "loss": 0.1912, "lr": 4.773230692139463e-06, "epoch": 8.200524246395807, "percentage": 82.01, "elapsed_time": "0:51:48", "remaining_time": "0:11:22", "throughput": 2699.49, "total_tokens": 8390336}
|
| 6274 |
+
{"current_steps": 31290, "total_steps": 38150, "loss": 0.0793, "lr": 4.766510989987022e-06, "epoch": 8.20183486238532, "percentage": 82.02, "elapsed_time": "0:51:48", "remaining_time": "0:11:21", "throughput": 2699.53, "total_tokens": 8391680}
|
| 6275 |
+
{"current_steps": 31295, "total_steps": 38150, "loss": 0.1805, "lr": 4.7597955226688666e-06, "epoch": 8.203145478374836, "percentage": 82.03, "elapsed_time": "0:51:48", "remaining_time": "0:11:21", "throughput": 2699.53, "total_tokens": 8392784}
|
| 6276 |
+
{"current_steps": 31300, "total_steps": 38150, "loss": 0.1504, "lr": 4.75308429159054e-06, "epoch": 8.204456094364351, "percentage": 82.04, "elapsed_time": "0:51:49", "remaining_time": "0:11:20", "throughput": 2699.6, "total_tokens": 8394288}
|
| 6277 |
+
{"current_steps": 31305, "total_steps": 38150, "loss": 0.1255, "lr": 4.746377298156685e-06, "epoch": 8.205766710353867, "percentage": 82.06, "elapsed_time": "0:51:49", "remaining_time": "0:11:19", "throughput": 2699.64, "total_tokens": 8395632}
|
| 6278 |
+
{"current_steps": 31310, "total_steps": 38150, "loss": 0.0812, "lr": 4.739674543771066e-06, "epoch": 8.20707732634338, "percentage": 82.07, "elapsed_time": "0:51:50", "remaining_time": "0:11:19", "throughput": 2699.66, "total_tokens": 8396816}
|
| 6279 |
+
{"current_steps": 31315, "total_steps": 38150, "loss": 0.1734, "lr": 4.732976029836561e-06, "epoch": 8.208387942332896, "percentage": 82.08, "elapsed_time": "0:51:50", "remaining_time": "0:11:18", "throughput": 2699.69, "total_tokens": 8398048}
|
| 6280 |
+
{"current_steps": 31320, "total_steps": 38150, "loss": 0.1486, "lr": 4.726281757755149e-06, "epoch": 8.209698558322412, "percentage": 82.1, "elapsed_time": "0:51:51", "remaining_time": "0:11:18", "throughput": 2699.72, "total_tokens": 8399408}
|
| 6281 |
+
{"current_steps": 31325, "total_steps": 38150, "loss": 0.1496, "lr": 4.719591728927938e-06, "epoch": 8.211009174311927, "percentage": 82.11, "elapsed_time": "0:51:51", "remaining_time": "0:11:17", "throughput": 2699.82, "total_tokens": 8400992}
|
| 6282 |
+
{"current_steps": 31330, "total_steps": 38150, "loss": 0.1623, "lr": 4.712905944755144e-06, "epoch": 8.212319790301441, "percentage": 82.12, "elapsed_time": "0:51:52", "remaining_time": "0:11:17", "throughput": 2699.82, "total_tokens": 8402208}
|
| 6283 |
+
{"current_steps": 31335, "total_steps": 38150, "loss": 0.0966, "lr": 4.706224406636072e-06, "epoch": 8.213630406290957, "percentage": 82.14, "elapsed_time": "0:51:52", "remaining_time": "0:11:16", "throughput": 2699.86, "total_tokens": 8403520}
|
| 6284 |
+
{"current_steps": 31340, "total_steps": 38150, "loss": 0.1328, "lr": 4.699547115969172e-06, "epoch": 8.214941022280472, "percentage": 82.15, "elapsed_time": "0:51:53", "remaining_time": "0:11:16", "throughput": 2699.94, "total_tokens": 8405040}
|
| 6285 |
+
{"current_steps": 31345, "total_steps": 38150, "loss": 0.1847, "lr": 4.692874074151979e-06, "epoch": 8.216251638269988, "percentage": 82.16, "elapsed_time": "0:51:53", "remaining_time": "0:11:15", "throughput": 2700.02, "total_tokens": 8406512}
|
| 6286 |
+
{"current_steps": 31350, "total_steps": 38150, "loss": 0.1573, "lr": 4.686205282581152e-06, "epoch": 8.217562254259501, "percentage": 82.18, "elapsed_time": "0:51:53", "remaining_time": "0:11:15", "throughput": 2700.0, "total_tokens": 8407584}
|
| 6287 |
+
{"current_steps": 31355, "total_steps": 38150, "loss": 0.1538, "lr": 4.679540742652463e-06, "epoch": 8.218872870249017, "percentage": 82.19, "elapsed_time": "0:51:54", "remaining_time": "0:11:14", "throughput": 2699.99, "total_tokens": 8408672}
|
| 6288 |
+
{"current_steps": 31360, "total_steps": 38150, "loss": 0.0985, "lr": 4.672880455760786e-06, "epoch": 8.220183486238533, "percentage": 82.2, "elapsed_time": "0:51:54", "remaining_time": "0:11:14", "throughput": 2699.97, "total_tokens": 8409712}
|
| 6289 |
+
{"current_steps": 31365, "total_steps": 38150, "loss": 0.2616, "lr": 4.666224423300114e-06, "epoch": 8.221494102228046, "percentage": 82.21, "elapsed_time": "0:51:55", "remaining_time": "0:11:13", "throughput": 2700.0, "total_tokens": 8411040}
|
| 6290 |
+
{"current_steps": 31370, "total_steps": 38150, "loss": 0.0784, "lr": 4.659572646663532e-06, "epoch": 8.222804718217562, "percentage": 82.23, "elapsed_time": "0:51:55", "remaining_time": "0:11:13", "throughput": 2700.06, "total_tokens": 8412448}
|
| 6291 |
+
{"current_steps": 31375, "total_steps": 38150, "loss": 0.1778, "lr": 4.652925127243246e-06, "epoch": 8.224115334207077, "percentage": 82.24, "elapsed_time": "0:51:56", "remaining_time": "0:11:12", "throughput": 2700.06, "total_tokens": 8413568}
|
| 6292 |
+
{"current_steps": 31380, "total_steps": 38150, "loss": 0.3368, "lr": 4.646281866430574e-06, "epoch": 8.225425950196593, "percentage": 82.25, "elapsed_time": "0:51:56", "remaining_time": "0:11:12", "throughput": 2700.03, "total_tokens": 8414688}
|
| 6293 |
+
{"current_steps": 31385, "total_steps": 38150, "loss": 0.2056, "lr": 4.6396428656159424e-06, "epoch": 8.226736566186107, "percentage": 82.27, "elapsed_time": "0:51:56", "remaining_time": "0:11:11", "throughput": 2700.12, "total_tokens": 8416160}
|
| 6294 |
+
{"current_steps": 31390, "total_steps": 38150, "loss": 0.2016, "lr": 4.633008126188876e-06, "epoch": 8.228047182175622, "percentage": 82.28, "elapsed_time": "0:51:57", "remaining_time": "0:11:11", "throughput": 2700.12, "total_tokens": 8417344}
|
| 6295 |
+
{"current_steps": 31395, "total_steps": 38150, "loss": 0.1814, "lr": 4.626377649538019e-06, "epoch": 8.229357798165138, "percentage": 82.29, "elapsed_time": "0:51:57", "remaining_time": "0:11:10", "throughput": 2700.14, "total_tokens": 8418528}
|
| 6296 |
+
{"current_steps": 31400, "total_steps": 38150, "loss": 0.0944, "lr": 4.619751437051114e-06, "epoch": 8.230668414154653, "percentage": 82.31, "elapsed_time": "0:51:58", "remaining_time": "0:11:10", "throughput": 2700.11, "total_tokens": 8419536}
|
| 6297 |
+
{"current_steps": 31405, "total_steps": 38150, "loss": 0.1188, "lr": 4.613129490115023e-06, "epoch": 8.231979030144167, "percentage": 82.32, "elapsed_time": "0:51:58", "remaining_time": "0:11:09", "throughput": 2700.14, "total_tokens": 8420800}
|
| 6298 |
+
{"current_steps": 31410, "total_steps": 38150, "loss": 0.1026, "lr": 4.6065118101157014e-06, "epoch": 8.233289646133683, "percentage": 82.33, "elapsed_time": "0:51:59", "remaining_time": "0:11:09", "throughput": 2700.15, "total_tokens": 8422016}
|
| 6299 |
+
{"current_steps": 31415, "total_steps": 38150, "loss": 0.1737, "lr": 4.599898398438221e-06, "epoch": 8.234600262123198, "percentage": 82.35, "elapsed_time": "0:51:59", "remaining_time": "0:11:08", "throughput": 2700.25, "total_tokens": 8423552}
|
| 6300 |
+
{"current_steps": 31420, "total_steps": 38150, "loss": 0.1903, "lr": 4.593289256466757e-06, "epoch": 8.235910878112714, "percentage": 82.36, "elapsed_time": "0:51:59", "remaining_time": "0:11:08", "throughput": 2700.33, "total_tokens": 8425008}
|
| 6301 |
+
{"current_steps": 31425, "total_steps": 38150, "loss": 0.0876, "lr": 4.586684385584592e-06, "epoch": 8.237221494102227, "percentage": 82.37, "elapsed_time": "0:52:00", "remaining_time": "0:11:07", "throughput": 2700.36, "total_tokens": 8426256}
|
| 6302 |
+
{"current_steps": 31430, "total_steps": 38150, "loss": 0.0831, "lr": 4.580083787174114e-06, "epoch": 8.238532110091743, "percentage": 82.39, "elapsed_time": "0:52:00", "remaining_time": "0:11:07", "throughput": 2700.46, "total_tokens": 8427952}
|
| 6303 |
+
{"current_steps": 31435, "total_steps": 38150, "loss": 0.1704, "lr": 4.573487462616815e-06, "epoch": 8.239842726081259, "percentage": 82.4, "elapsed_time": "0:52:01", "remaining_time": "0:11:06", "throughput": 2700.47, "total_tokens": 8429120}
|
| 6304 |
+
{"current_steps": 31440, "total_steps": 38150, "loss": 0.0408, "lr": 4.566895413293298e-06, "epoch": 8.241153342070774, "percentage": 82.41, "elapsed_time": "0:52:01", "remaining_time": "0:11:06", "throughput": 2700.4, "total_tokens": 8430032}
|
| 6305 |
+
{"current_steps": 31445, "total_steps": 38150, "loss": 0.1548, "lr": 4.560307640583264e-06, "epoch": 8.242463958060288, "percentage": 82.42, "elapsed_time": "0:52:02", "remaining_time": "0:11:05", "throughput": 2700.36, "total_tokens": 8431024}
|
| 6306 |
+
{"current_steps": 31450, "total_steps": 38150, "loss": 0.1543, "lr": 4.553724145865529e-06, "epoch": 8.243774574049803, "percentage": 82.44, "elapsed_time": "0:52:02", "remaining_time": "0:11:05", "throughput": 2700.36, "total_tokens": 8432160}
|
| 6307 |
+
{"current_steps": 31455, "total_steps": 38150, "loss": 0.1042, "lr": 4.547144930517996e-06, "epoch": 8.245085190039319, "percentage": 82.45, "elapsed_time": "0:52:03", "remaining_time": "0:11:04", "throughput": 2700.37, "total_tokens": 8433328}
|
| 6308 |
+
{"current_steps": 31460, "total_steps": 38150, "loss": 0.206, "lr": 4.54056999591769e-06, "epoch": 8.246395806028833, "percentage": 82.46, "elapsed_time": "0:52:03", "remaining_time": "0:11:04", "throughput": 2700.51, "total_tokens": 8435280}
|
| 6309 |
+
{"current_steps": 31465, "total_steps": 38150, "loss": 0.1354, "lr": 4.533999343440728e-06, "epoch": 8.247706422018348, "percentage": 82.48, "elapsed_time": "0:52:04", "remaining_time": "0:11:03", "throughput": 2700.55, "total_tokens": 8436688}
|
| 6310 |
+
{"current_steps": 31470, "total_steps": 38150, "loss": 0.0998, "lr": 4.527432974462345e-06, "epoch": 8.249017038007864, "percentage": 82.49, "elapsed_time": "0:52:04", "remaining_time": "0:11:03", "throughput": 2700.52, "total_tokens": 8437680}
|
| 6311 |
+
{"current_steps": 31475, "total_steps": 38150, "loss": 0.2157, "lr": 4.520870890356868e-06, "epoch": 8.25032765399738, "percentage": 82.5, "elapsed_time": "0:52:04", "remaining_time": "0:11:02", "throughput": 2700.53, "total_tokens": 8438864}
|
| 6312 |
+
{"current_steps": 31480, "total_steps": 38150, "loss": 0.18, "lr": 4.5143130924977265e-06, "epoch": 8.251638269986893, "percentage": 82.52, "elapsed_time": "0:52:05", "remaining_time": "0:11:02", "throughput": 2700.59, "total_tokens": 8440272}
|
| 6313 |
+
{"current_steps": 31485, "total_steps": 38150, "loss": 0.1905, "lr": 4.5077595822574645e-06, "epoch": 8.252948885976409, "percentage": 82.53, "elapsed_time": "0:52:05", "remaining_time": "0:11:01", "throughput": 2700.59, "total_tokens": 8441424}
|
| 6314 |
+
{"current_steps": 31490, "total_steps": 38150, "loss": 0.0991, "lr": 4.50121036100771e-06, "epoch": 8.254259501965924, "percentage": 82.54, "elapsed_time": "0:52:06", "remaining_time": "0:11:01", "throughput": 2700.63, "total_tokens": 8442704}
|
| 6315 |
+
{"current_steps": 31495, "total_steps": 38150, "loss": 0.1337, "lr": 4.494665430119208e-06, "epoch": 8.25557011795544, "percentage": 82.56, "elapsed_time": "0:52:06", "remaining_time": "0:11:00", "throughput": 2700.61, "total_tokens": 8443744}
|
| 6316 |
+
{"current_steps": 31500, "total_steps": 38150, "loss": 0.2403, "lr": 4.488124790961798e-06, "epoch": 8.256880733944953, "percentage": 82.57, "elapsed_time": "0:52:07", "remaining_time": "0:11:00", "throughput": 2700.73, "total_tokens": 8445648}
|
| 6317 |
+
{"current_steps": 31505, "total_steps": 38150, "loss": 0.1003, "lr": 4.4815884449044275e-06, "epoch": 8.258191349934469, "percentage": 82.58, "elapsed_time": "0:52:07", "remaining_time": "0:10:59", "throughput": 2700.79, "total_tokens": 8447072}
|
| 6318 |
+
{"current_steps": 31510, "total_steps": 38150, "loss": 0.1404, "lr": 4.475056393315144e-06, "epoch": 8.259501965923985, "percentage": 82.6, "elapsed_time": "0:52:08", "remaining_time": "0:10:59", "throughput": 2700.86, "total_tokens": 8448496}
|
| 6319 |
+
{"current_steps": 31515, "total_steps": 38150, "loss": 0.1585, "lr": 4.468528637561095e-06, "epoch": 8.2608125819135, "percentage": 82.61, "elapsed_time": "0:52:08", "remaining_time": "0:10:58", "throughput": 2701.0, "total_tokens": 8450336}
|
| 6320 |
+
{"current_steps": 31520, "total_steps": 38150, "loss": 0.1902, "lr": 4.462005179008525e-06, "epoch": 8.262123197903014, "percentage": 82.62, "elapsed_time": "0:52:09", "remaining_time": "0:10:58", "throughput": 2701.16, "total_tokens": 8452368}
|
| 6321 |
+
{"current_steps": 31525, "total_steps": 38150, "loss": 0.2145, "lr": 4.455486019022781e-06, "epoch": 8.26343381389253, "percentage": 82.63, "elapsed_time": "0:52:09", "remaining_time": "0:10:57", "throughput": 2701.23, "total_tokens": 8453760}
|
| 6322 |
+
{"current_steps": 31530, "total_steps": 38150, "loss": 0.1406, "lr": 4.448971158968318e-06, "epoch": 8.264744429882045, "percentage": 82.65, "elapsed_time": "0:52:10", "remaining_time": "0:10:57", "throughput": 2701.25, "total_tokens": 8455024}
|
| 6323 |
+
{"current_steps": 31535, "total_steps": 38150, "loss": 0.2506, "lr": 4.4424606002086814e-06, "epoch": 8.26605504587156, "percentage": 82.66, "elapsed_time": "0:52:10", "remaining_time": "0:10:56", "throughput": 2701.36, "total_tokens": 8456640}
|
| 6324 |
+
{"current_steps": 31540, "total_steps": 38150, "loss": 0.1753, "lr": 4.435954344106522e-06, "epoch": 8.267365661861074, "percentage": 82.67, "elapsed_time": "0:52:10", "remaining_time": "0:10:56", "throughput": 2701.4, "total_tokens": 8457920}
|
| 6325 |
+
{"current_steps": 31545, "total_steps": 38150, "loss": 0.0969, "lr": 4.429452392023584e-06, "epoch": 8.26867627785059, "percentage": 82.69, "elapsed_time": "0:52:11", "remaining_time": "0:10:55", "throughput": 2701.51, "total_tokens": 8459584}
|
| 6326 |
+
{"current_steps": 31550, "total_steps": 38150, "loss": 0.1775, "lr": 4.4229547453207175e-06, "epoch": 8.269986893840105, "percentage": 82.7, "elapsed_time": "0:52:11", "remaining_time": "0:10:55", "throughput": 2701.5, "total_tokens": 8460656}
|
| 6327 |
+
{"current_steps": 31555, "total_steps": 38150, "loss": 0.1707, "lr": 4.416461405357869e-06, "epoch": 8.271297509829619, "percentage": 82.71, "elapsed_time": "0:52:12", "remaining_time": "0:10:54", "throughput": 2701.55, "total_tokens": 8462064}
|
| 6328 |
+
{"current_steps": 31560, "total_steps": 38150, "loss": 0.1768, "lr": 4.409972373494084e-06, "epoch": 8.272608125819135, "percentage": 82.73, "elapsed_time": "0:52:12", "remaining_time": "0:10:54", "throughput": 2701.57, "total_tokens": 8463264}
|
| 6329 |
+
{"current_steps": 31565, "total_steps": 38150, "loss": 0.2432, "lr": 4.403487651087509e-06, "epoch": 8.27391874180865, "percentage": 82.74, "elapsed_time": "0:52:13", "remaining_time": "0:10:53", "throughput": 2701.61, "total_tokens": 8464560}
|
| 6330 |
+
{"current_steps": 31570, "total_steps": 38150, "loss": 0.113, "lr": 4.397007239495376e-06, "epoch": 8.275229357798166, "percentage": 82.75, "elapsed_time": "0:52:13", "remaining_time": "0:10:53", "throughput": 2701.63, "total_tokens": 8465744}
|
| 6331 |
+
{"current_steps": 31575, "total_steps": 38150, "loss": 0.1573, "lr": 4.390531140074028e-06, "epoch": 8.27653997378768, "percentage": 82.77, "elapsed_time": "0:52:14", "remaining_time": "0:10:52", "throughput": 2701.74, "total_tokens": 8467376}
|
| 6332 |
+
{"current_steps": 31580, "total_steps": 38150, "loss": 0.1509, "lr": 4.384059354178893e-06, "epoch": 8.277850589777195, "percentage": 82.78, "elapsed_time": "0:52:14", "remaining_time": "0:10:52", "throughput": 2701.72, "total_tokens": 8468432}
|
| 6333 |
+
{"current_steps": 31585, "total_steps": 38150, "loss": 0.1246, "lr": 4.377591883164522e-06, "epoch": 8.27916120576671, "percentage": 82.79, "elapsed_time": "0:52:14", "remaining_time": "0:10:51", "throughput": 2701.73, "total_tokens": 8469680}
|
| 6334 |
+
{"current_steps": 31590, "total_steps": 38150, "loss": 0.2396, "lr": 4.371128728384538e-06, "epoch": 8.280471821756226, "percentage": 82.8, "elapsed_time": "0:52:15", "remaining_time": "0:10:51", "throughput": 2701.76, "total_tokens": 8471008}
|
| 6335 |
+
{"current_steps": 31595, "total_steps": 38150, "loss": 0.1392, "lr": 4.364669891191667e-06, "epoch": 8.28178243774574, "percentage": 82.82, "elapsed_time": "0:52:15", "remaining_time": "0:10:50", "throughput": 2701.83, "total_tokens": 8472400}
|
| 6336 |
+
{"current_steps": 31600, "total_steps": 38150, "loss": 0.1142, "lr": 4.358215372937738e-06, "epoch": 8.283093053735255, "percentage": 82.83, "elapsed_time": "0:52:16", "remaining_time": "0:10:50", "throughput": 2701.99, "total_tokens": 8474368}
|
| 6337 |
+
{"current_steps": 31605, "total_steps": 38150, "loss": 0.0853, "lr": 4.35176517497366e-06, "epoch": 8.284403669724771, "percentage": 82.84, "elapsed_time": "0:52:16", "remaining_time": "0:10:49", "throughput": 2701.97, "total_tokens": 8475424}
|
| 6338 |
+
{"current_steps": 31610, "total_steps": 38150, "loss": 0.2617, "lr": 4.34531929864945e-06, "epoch": 8.285714285714286, "percentage": 82.86, "elapsed_time": "0:52:17", "remaining_time": "0:10:49", "throughput": 2702.09, "total_tokens": 8477120}
|
| 6339 |
+
{"current_steps": 31615, "total_steps": 38150, "loss": 0.1942, "lr": 4.3388777453142265e-06, "epoch": 8.2870249017038, "percentage": 82.87, "elapsed_time": "0:52:17", "remaining_time": "0:10:48", "throughput": 2702.15, "total_tokens": 8478528}
|
| 6340 |
+
{"current_steps": 31620, "total_steps": 38150, "loss": 0.1098, "lr": 4.332440516316188e-06, "epoch": 8.288335517693316, "percentage": 82.88, "elapsed_time": "0:52:18", "remaining_time": "0:10:48", "throughput": 2702.21, "total_tokens": 8480032}
|
| 6341 |
+
{"current_steps": 31625, "total_steps": 38150, "loss": 0.1647, "lr": 4.326007613002642e-06, "epoch": 8.289646133682831, "percentage": 82.9, "elapsed_time": "0:52:18", "remaining_time": "0:10:47", "throughput": 2702.24, "total_tokens": 8481280}
|
| 6342 |
+
{"current_steps": 31630, "total_steps": 38150, "loss": 0.1876, "lr": 4.319579036719979e-06, "epoch": 8.290956749672347, "percentage": 82.91, "elapsed_time": "0:52:19", "remaining_time": "0:10:47", "throughput": 2702.36, "total_tokens": 8483040}
|
| 6343 |
+
{"current_steps": 31635, "total_steps": 38150, "loss": 0.1577, "lr": 4.313154788813684e-06, "epoch": 8.29226736566186, "percentage": 82.92, "elapsed_time": "0:52:19", "remaining_time": "0:10:46", "throughput": 2702.33, "total_tokens": 8484016}
|
| 6344 |
+
{"current_steps": 31640, "total_steps": 38150, "loss": 0.1815, "lr": 4.30673487062836e-06, "epoch": 8.293577981651376, "percentage": 82.94, "elapsed_time": "0:52:19", "remaining_time": "0:10:46", "throughput": 2702.27, "total_tokens": 8484960}
|
| 6345 |
+
{"current_steps": 31645, "total_steps": 38150, "loss": 0.1609, "lr": 4.300319283507667e-06, "epoch": 8.294888597640892, "percentage": 82.95, "elapsed_time": "0:52:20", "remaining_time": "0:10:45", "throughput": 2702.27, "total_tokens": 8486112}
|
| 6346 |
+
{"current_steps": 31650, "total_steps": 38150, "loss": 0.1508, "lr": 4.29390802879438e-06, "epoch": 8.296199213630405, "percentage": 82.96, "elapsed_time": "0:52:20", "remaining_time": "0:10:45", "throughput": 2702.23, "total_tokens": 8487072}
|
| 6347 |
+
{"current_steps": 31655, "total_steps": 38150, "loss": 0.2161, "lr": 4.287501107830366e-06, "epoch": 8.297509829619921, "percentage": 82.98, "elapsed_time": "0:52:21", "remaining_time": "0:10:44", "throughput": 2702.3, "total_tokens": 8488592}
|
| 6348 |
+
{"current_steps": 31660, "total_steps": 38150, "loss": 0.2042, "lr": 4.2810985219565835e-06, "epoch": 8.298820445609437, "percentage": 82.99, "elapsed_time": "0:52:21", "remaining_time": "0:10:44", "throughput": 2702.38, "total_tokens": 8490064}
|
| 6349 |
+
{"current_steps": 31665, "total_steps": 38150, "loss": 0.1326, "lr": 4.27470027251308e-06, "epoch": 8.300131061598952, "percentage": 83.0, "elapsed_time": "0:52:22", "remaining_time": "0:10:43", "throughput": 2702.47, "total_tokens": 8491648}
|
| 6350 |
+
{"current_steps": 31670, "total_steps": 38150, "loss": 0.1629, "lr": 4.268306360838998e-06, "epoch": 8.301441677588466, "percentage": 83.01, "elapsed_time": "0:52:22", "remaining_time": "0:10:43", "throughput": 2702.44, "total_tokens": 8492672}
|
| 6351 |
+
{"current_steps": 31675, "total_steps": 38150, "loss": 0.3437, "lr": 4.261916788272571e-06, "epoch": 8.302752293577981, "percentage": 83.03, "elapsed_time": "0:52:23", "remaining_time": "0:10:42", "throughput": 2702.48, "total_tokens": 8494080}
|
| 6352 |
+
{"current_steps": 31680, "total_steps": 38150, "loss": 0.221, "lr": 4.2555315561511326e-06, "epoch": 8.304062909567497, "percentage": 83.04, "elapsed_time": "0:52:23", "remaining_time": "0:10:42", "throughput": 2702.55, "total_tokens": 8495568}
|
| 6353 |
+
{"current_steps": 31685, "total_steps": 38150, "loss": 0.0972, "lr": 4.249150665811097e-06, "epoch": 8.305373525557012, "percentage": 83.05, "elapsed_time": "0:52:23", "remaining_time": "0:10:41", "throughput": 2702.56, "total_tokens": 8496768}
|
| 6354 |
+
{"current_steps": 31690, "total_steps": 38150, "loss": 0.1291, "lr": 4.242774118587969e-06, "epoch": 8.306684141546526, "percentage": 83.07, "elapsed_time": "0:52:24", "remaining_time": "0:10:40", "throughput": 2702.65, "total_tokens": 8498384}
|
| 6355 |
+
{"current_steps": 31695, "total_steps": 38150, "loss": 0.2561, "lr": 4.236401915816346e-06, "epoch": 8.307994757536042, "percentage": 83.08, "elapsed_time": "0:52:24", "remaining_time": "0:10:40", "throughput": 2702.71, "total_tokens": 8499872}
|
| 6356 |
+
{"current_steps": 31700, "total_steps": 38150, "loss": 0.0785, "lr": 4.230034058829932e-06, "epoch": 8.309305373525557, "percentage": 83.09, "elapsed_time": "0:52:25", "remaining_time": "0:10:39", "throughput": 2702.69, "total_tokens": 8500944}
|
| 6357 |
+
{"current_steps": 31705, "total_steps": 38150, "loss": 0.1187, "lr": 4.2236705489615e-06, "epoch": 8.310615989515073, "percentage": 83.11, "elapsed_time": "0:52:25", "remaining_time": "0:10:39", "throughput": 2702.71, "total_tokens": 8502224}
|
| 6358 |
+
{"current_steps": 31710, "total_steps": 38150, "loss": 0.0898, "lr": 4.2173113875429246e-06, "epoch": 8.311926605504587, "percentage": 83.12, "elapsed_time": "0:52:26", "remaining_time": "0:10:38", "throughput": 2702.69, "total_tokens": 8503312}
|
| 6359 |
+
{"current_steps": 31715, "total_steps": 38150, "loss": 0.1378, "lr": 4.2109565759051645e-06, "epoch": 8.313237221494102, "percentage": 83.13, "elapsed_time": "0:52:26", "remaining_time": "0:10:38", "throughput": 2702.75, "total_tokens": 8504752}
|
| 6360 |
+
{"current_steps": 31720, "total_steps": 38150, "loss": 0.1512, "lr": 4.204606115378282e-06, "epoch": 8.314547837483618, "percentage": 83.15, "elapsed_time": "0:52:27", "remaining_time": "0:10:37", "throughput": 2702.8, "total_tokens": 8506160}
|
| 6361 |
+
{"current_steps": 31725, "total_steps": 38150, "loss": 0.3664, "lr": 4.198260007291399e-06, "epoch": 8.315858453473133, "percentage": 83.16, "elapsed_time": "0:52:27", "remaining_time": "0:10:37", "throughput": 2702.88, "total_tokens": 8507616}
|
| 6362 |
+
{"current_steps": 31730, "total_steps": 38150, "loss": 0.0975, "lr": 4.191918252972754e-06, "epoch": 8.317169069462647, "percentage": 83.17, "elapsed_time": "0:52:28", "remaining_time": "0:10:36", "throughput": 2702.89, "total_tokens": 8508784}
|
| 6363 |
+
{"current_steps": 31735, "total_steps": 38150, "loss": 0.1464, "lr": 4.185580853749666e-06, "epoch": 8.318479685452163, "percentage": 83.18, "elapsed_time": "0:52:28", "remaining_time": "0:10:36", "throughput": 2702.93, "total_tokens": 8510080}
|
| 6364 |
+
{"current_steps": 31740, "total_steps": 38150, "loss": 0.1281, "lr": 4.179247810948539e-06, "epoch": 8.319790301441678, "percentage": 83.2, "elapsed_time": "0:52:28", "remaining_time": "0:10:35", "throughput": 2703.01, "total_tokens": 8511488}
|
| 6365 |
+
{"current_steps": 31745, "total_steps": 38150, "loss": 0.2368, "lr": 4.172919125894869e-06, "epoch": 8.321100917431192, "percentage": 83.21, "elapsed_time": "0:52:29", "remaining_time": "0:10:35", "throughput": 2703.07, "total_tokens": 8512944}
|
| 6366 |
+
{"current_steps": 31750, "total_steps": 38150, "loss": 0.1268, "lr": 4.166594799913237e-06, "epoch": 8.322411533420707, "percentage": 83.22, "elapsed_time": "0:52:29", "remaining_time": "0:10:34", "throughput": 2703.09, "total_tokens": 8514128}
|
| 6367 |
+
{"current_steps": 31755, "total_steps": 38150, "loss": 0.1375, "lr": 4.160274834327327e-06, "epoch": 8.323722149410223, "percentage": 83.24, "elapsed_time": "0:52:30", "remaining_time": "0:10:34", "throughput": 2703.18, "total_tokens": 8515760}
|
| 6368 |
+
{"current_steps": 31760, "total_steps": 38150, "loss": 0.2065, "lr": 4.153959230459878e-06, "epoch": 8.325032765399738, "percentage": 83.25, "elapsed_time": "0:52:30", "remaining_time": "0:10:33", "throughput": 2703.24, "total_tokens": 8517104}
|
| 6369 |
+
{"current_steps": 31765, "total_steps": 38150, "loss": 0.1783, "lr": 4.147647989632744e-06, "epoch": 8.326343381389252, "percentage": 83.26, "elapsed_time": "0:52:31", "remaining_time": "0:10:33", "throughput": 2703.25, "total_tokens": 8518256}
|
| 6370 |
+
{"current_steps": 31770, "total_steps": 38150, "loss": 0.2465, "lr": 4.141341113166855e-06, "epoch": 8.327653997378768, "percentage": 83.28, "elapsed_time": "0:52:31", "remaining_time": "0:10:32", "throughput": 2703.28, "total_tokens": 8519600}
|
| 6371 |
+
{"current_steps": 31775, "total_steps": 38150, "loss": 0.1463, "lr": 4.135038602382232e-06, "epoch": 8.328964613368283, "percentage": 83.29, "elapsed_time": "0:52:32", "remaining_time": "0:10:32", "throughput": 2703.31, "total_tokens": 8520944}
|
| 6372 |
+
{"current_steps": 31780, "total_steps": 38150, "loss": 0.1057, "lr": 4.1287404585979764e-06, "epoch": 8.330275229357799, "percentage": 83.3, "elapsed_time": "0:52:32", "remaining_time": "0:10:31", "throughput": 2703.33, "total_tokens": 8522160}
|
| 6373 |
+
{"current_steps": 31785, "total_steps": 38150, "loss": 0.1734, "lr": 4.122446683132281e-06, "epoch": 8.331585845347313, "percentage": 83.32, "elapsed_time": "0:52:32", "remaining_time": "0:10:31", "throughput": 2703.42, "total_tokens": 8523808}
|
| 6374 |
+
{"current_steps": 31790, "total_steps": 38150, "loss": 0.4625, "lr": 4.116157277302418e-06, "epoch": 8.332896461336828, "percentage": 83.33, "elapsed_time": "0:52:33", "remaining_time": "0:10:30", "throughput": 2703.55, "total_tokens": 8525616}
|
| 6375 |
+
{"current_steps": 31795, "total_steps": 38150, "loss": 0.181, "lr": 4.109872242424756e-06, "epoch": 8.334207077326344, "percentage": 83.34, "elapsed_time": "0:52:33", "remaining_time": "0:10:30", "throughput": 2703.66, "total_tokens": 8527200}
|
| 6376 |
+
{"current_steps": 31800, "total_steps": 38150, "loss": 0.1953, "lr": 4.1035915798147415e-06, "epoch": 8.33551769331586, "percentage": 83.36, "elapsed_time": "0:52:34", "remaining_time": "0:10:29", "throughput": 2703.72, "total_tokens": 8528720}
|
| 6377 |
+
{"current_steps": 31805, "total_steps": 38150, "loss": 0.1241, "lr": 4.097315290786899e-06, "epoch": 8.336828309305373, "percentage": 83.37, "elapsed_time": "0:52:34", "remaining_time": "0:10:29", "throughput": 2703.73, "total_tokens": 8529888}
|
| 6378 |
+
{"current_steps": 31810, "total_steps": 38150, "loss": 0.1166, "lr": 4.091043376654843e-06, "epoch": 8.338138925294889, "percentage": 83.38, "elapsed_time": "0:52:35", "remaining_time": "0:10:28", "throughput": 2703.8, "total_tokens": 8531360}
|
| 6379 |
+
{"current_steps": 31815, "total_steps": 38150, "loss": 0.0758, "lr": 4.084775838731286e-06, "epoch": 8.339449541284404, "percentage": 83.39, "elapsed_time": "0:52:35", "remaining_time": "0:10:28", "throughput": 2703.83, "total_tokens": 8532656}
|
| 6380 |
+
{"current_steps": 31820, "total_steps": 38150, "loss": 0.1562, "lr": 4.078512678328009e-06, "epoch": 8.34076015727392, "percentage": 83.41, "elapsed_time": "0:52:36", "remaining_time": "0:10:27", "throughput": 2703.81, "total_tokens": 8533712}
|
| 6381 |
+
{"current_steps": 31825, "total_steps": 38150, "loss": 0.116, "lr": 4.072253896755876e-06, "epoch": 8.342070773263433, "percentage": 83.42, "elapsed_time": "0:52:36", "remaining_time": "0:10:27", "throughput": 2703.86, "total_tokens": 8535008}
|
| 6382 |
+
{"current_steps": 31830, "total_steps": 38150, "loss": 0.2383, "lr": 4.065999495324843e-06, "epoch": 8.343381389252949, "percentage": 83.43, "elapsed_time": "0:52:37", "remaining_time": "0:10:26", "throughput": 2703.88, "total_tokens": 8536256}
|
| 6383 |
+
{"current_steps": 31835, "total_steps": 38150, "loss": 0.1285, "lr": 4.059749475343949e-06, "epoch": 8.344692005242464, "percentage": 83.45, "elapsed_time": "0:52:37", "remaining_time": "0:10:26", "throughput": 2703.94, "total_tokens": 8537680}
|
| 6384 |
+
{"current_steps": 31840, "total_steps": 38150, "loss": 0.1286, "lr": 4.053503838121306e-06, "epoch": 8.346002621231978, "percentage": 83.46, "elapsed_time": "0:52:37", "remaining_time": "0:10:25", "throughput": 2703.99, "total_tokens": 8539072}
|
| 6385 |
+
{"current_steps": 31845, "total_steps": 38150, "loss": 0.2023, "lr": 4.047262584964112e-06, "epoch": 8.347313237221494, "percentage": 83.47, "elapsed_time": "0:52:38", "remaining_time": "0:10:25", "throughput": 2704.07, "total_tokens": 8540576}
|
| 6386 |
+
{"current_steps": 31850, "total_steps": 38150, "loss": 0.2138, "lr": 4.0410257171786595e-06, "epoch": 8.34862385321101, "percentage": 83.49, "elapsed_time": "0:52:38", "remaining_time": "0:10:24", "throughput": 2704.17, "total_tokens": 8542176}
|
| 6387 |
+
{"current_steps": 31855, "total_steps": 38150, "loss": 0.2396, "lr": 4.034793236070308e-06, "epoch": 8.349934469200525, "percentage": 83.5, "elapsed_time": "0:52:39", "remaining_time": "0:10:24", "throughput": 2704.25, "total_tokens": 8543600}
|
| 6388 |
+
{"current_steps": 31860, "total_steps": 38150, "loss": 0.1505, "lr": 4.028565142943508e-06, "epoch": 8.351245085190039, "percentage": 83.51, "elapsed_time": "0:52:39", "remaining_time": "0:10:23", "throughput": 2704.32, "total_tokens": 8545072}
|
| 6389 |
+
{"current_steps": 31865, "total_steps": 38150, "loss": 0.1033, "lr": 4.022341439101782e-06, "epoch": 8.352555701179554, "percentage": 83.53, "elapsed_time": "0:52:40", "remaining_time": "0:10:23", "throughput": 2704.25, "total_tokens": 8545952}
|
| 6390 |
+
{"current_steps": 31870, "total_steps": 38150, "loss": 0.1265, "lr": 4.016122125847751e-06, "epoch": 8.35386631716907, "percentage": 83.54, "elapsed_time": "0:52:40", "remaining_time": "0:10:22", "throughput": 2704.28, "total_tokens": 8547168}
|
| 6391 |
+
{"current_steps": 31875, "total_steps": 38150, "loss": 0.1137, "lr": 4.009907204483107e-06, "epoch": 8.355176933158585, "percentage": 83.55, "elapsed_time": "0:52:41", "remaining_time": "0:10:22", "throughput": 2704.32, "total_tokens": 8548480}
|
| 6392 |
+
{"current_steps": 31880, "total_steps": 38150, "loss": 0.1482, "lr": 4.003696676308613e-06, "epoch": 8.356487549148099, "percentage": 83.56, "elapsed_time": "0:52:41", "remaining_time": "0:10:21", "throughput": 2704.4, "total_tokens": 8549904}
|
| 6393 |
+
{"current_steps": 31885, "total_steps": 38150, "loss": 0.0936, "lr": 3.997490542624124e-06, "epoch": 8.357798165137615, "percentage": 83.58, "elapsed_time": "0:52:41", "remaining_time": "0:10:21", "throughput": 2704.35, "total_tokens": 8550896}
|
| 6394 |
+
{"current_steps": 31890, "total_steps": 38150, "loss": 0.1611, "lr": 3.991288804728577e-06, "epoch": 8.35910878112713, "percentage": 83.59, "elapsed_time": "0:52:42", "remaining_time": "0:10:20", "throughput": 2704.37, "total_tokens": 8552096}
|
| 6395 |
+
{"current_steps": 31895, "total_steps": 38150, "loss": 0.1769, "lr": 3.985091463919982e-06, "epoch": 8.360419397116646, "percentage": 83.6, "elapsed_time": "0:52:42", "remaining_time": "0:10:20", "throughput": 2704.46, "total_tokens": 8553680}
|
| 6396 |
+
{"current_steps": 31900, "total_steps": 38150, "loss": 0.2379, "lr": 3.978898521495433e-06, "epoch": 8.36173001310616, "percentage": 83.62, "elapsed_time": "0:52:43", "remaining_time": "0:10:19", "throughput": 2704.72, "total_tokens": 8556080}
|
| 6397 |
+
{"current_steps": 31905, "total_steps": 38150, "loss": 0.1595, "lr": 3.972709978751099e-06, "epoch": 8.363040629095675, "percentage": 83.63, "elapsed_time": "0:52:43", "remaining_time": "0:10:19", "throughput": 2704.74, "total_tokens": 8557280}
|
| 6398 |
+
{"current_steps": 31910, "total_steps": 38150, "loss": 0.206, "lr": 3.966525836982237e-06, "epoch": 8.36435124508519, "percentage": 83.64, "elapsed_time": "0:52:44", "remaining_time": "0:10:18", "throughput": 2704.76, "total_tokens": 8558496}
|
| 6399 |
+
{"current_steps": 31915, "total_steps": 38150, "loss": 0.1829, "lr": 3.960346097483175e-06, "epoch": 8.365661861074706, "percentage": 83.66, "elapsed_time": "0:52:44", "remaining_time": "0:10:18", "throughput": 2704.83, "total_tokens": 8560000}
|
| 6400 |
+
{"current_steps": 31920, "total_steps": 38150, "loss": 0.1046, "lr": 3.954170761547324e-06, "epoch": 8.36697247706422, "percentage": 83.67, "elapsed_time": "0:52:45", "remaining_time": "0:10:17", "throughput": 2704.84, "total_tokens": 8561120}
|
| 6401 |
+
{"current_steps": 31925, "total_steps": 38150, "loss": 0.1494, "lr": 3.947999830467159e-06, "epoch": 8.368283093053735, "percentage": 83.68, "elapsed_time": "0:52:45", "remaining_time": "0:10:17", "throughput": 2704.78, "total_tokens": 8562048}
|
| 6402 |
+
{"current_steps": 31930, "total_steps": 38150, "loss": 0.0888, "lr": 3.941833305534259e-06, "epoch": 8.36959370904325, "percentage": 83.7, "elapsed_time": "0:52:45", "remaining_time": "0:10:16", "throughput": 2704.73, "total_tokens": 8562992}
|
| 6403 |
+
{"current_steps": 31935, "total_steps": 38150, "loss": 0.1643, "lr": 3.935671188039264e-06, "epoch": 8.370904325032765, "percentage": 83.71, "elapsed_time": "0:52:46", "remaining_time": "0:10:16", "throughput": 2704.89, "total_tokens": 8564816}
|
| 6404 |
+
{"current_steps": 31940, "total_steps": 38150, "loss": 0.1783, "lr": 3.9295134792718905e-06, "epoch": 8.37221494102228, "percentage": 83.72, "elapsed_time": "0:52:46", "remaining_time": "0:10:15", "throughput": 2705.0, "total_tokens": 8566464}
|
| 6405 |
+
{"current_steps": 31945, "total_steps": 38150, "loss": 0.1587, "lr": 3.923360180520941e-06, "epoch": 8.373525557011796, "percentage": 83.74, "elapsed_time": "0:52:47", "remaining_time": "0:10:15", "throughput": 2705.03, "total_tokens": 8567744}
|
| 6406 |
+
{"current_steps": 31950, "total_steps": 38150, "loss": 0.3205, "lr": 3.917211293074285e-06, "epoch": 8.374836173001311, "percentage": 83.75, "elapsed_time": "0:52:47", "remaining_time": "0:10:14", "throughput": 2705.11, "total_tokens": 8569280}
|
| 6407 |
+
{"current_steps": 31955, "total_steps": 38150, "loss": 0.2084, "lr": 3.911066818218886e-06, "epoch": 8.376146788990825, "percentage": 83.76, "elapsed_time": "0:52:48", "remaining_time": "0:10:14", "throughput": 2705.06, "total_tokens": 8570256}
|
| 6408 |
+
{"current_steps": 31960, "total_steps": 38150, "loss": 0.1865, "lr": 3.9049267572407565e-06, "epoch": 8.37745740498034, "percentage": 83.77, "elapsed_time": "0:52:48", "remaining_time": "0:10:13", "throughput": 2705.28, "total_tokens": 8572704}
|
| 6409 |
+
{"current_steps": 31965, "total_steps": 38150, "loss": 0.1527, "lr": 3.898791111425007e-06, "epoch": 8.378768020969856, "percentage": 83.79, "elapsed_time": "0:52:49", "remaining_time": "0:10:13", "throughput": 2705.33, "total_tokens": 8574016}
|
| 6410 |
+
{"current_steps": 31970, "total_steps": 38150, "loss": 0.1975, "lr": 3.892659882055819e-06, "epoch": 8.380078636959372, "percentage": 83.8, "elapsed_time": "0:52:49", "remaining_time": "0:10:12", "throughput": 2705.44, "total_tokens": 8575696}
|
| 6411 |
+
{"current_steps": 31975, "total_steps": 38150, "loss": 0.1775, "lr": 3.886533070416445e-06, "epoch": 8.381389252948885, "percentage": 83.81, "elapsed_time": "0:52:50", "remaining_time": "0:10:12", "throughput": 2705.53, "total_tokens": 8577248}
|
| 6412 |
+
{"current_steps": 31980, "total_steps": 38150, "loss": 0.1455, "lr": 3.880410677789214e-06, "epoch": 8.382699868938401, "percentage": 83.83, "elapsed_time": "0:52:50", "remaining_time": "0:10:11", "throughput": 2705.48, "total_tokens": 8578192}
|
| 6413 |
+
{"current_steps": 31985, "total_steps": 38150, "loss": 0.0697, "lr": 3.87429270545554e-06, "epoch": 8.384010484927916, "percentage": 83.84, "elapsed_time": "0:52:51", "remaining_time": "0:10:11", "throughput": 2705.49, "total_tokens": 8579488}
|
| 6414 |
+
{"current_steps": 31990, "total_steps": 38150, "loss": 0.0805, "lr": 3.868179154695909e-06, "epoch": 8.385321100917432, "percentage": 83.85, "elapsed_time": "0:52:51", "remaining_time": "0:10:10", "throughput": 2705.44, "total_tokens": 8580432}
|
| 6415 |
+
{"current_steps": 31995, "total_steps": 38150, "loss": 0.2514, "lr": 3.86207002678986e-06, "epoch": 8.386631716906946, "percentage": 83.87, "elapsed_time": "0:52:52", "remaining_time": "0:10:10", "throughput": 2705.55, "total_tokens": 8582064}
|
| 6416 |
+
{"current_steps": 32000, "total_steps": 38150, "loss": 0.2064, "lr": 3.855965323016031e-06, "epoch": 8.387942332896461, "percentage": 83.88, "elapsed_time": "0:52:52", "remaining_time": "0:10:09", "throughput": 2705.59, "total_tokens": 8583424}
|
| 6417 |
+
{"current_steps": 32005, "total_steps": 38150, "loss": 0.2078, "lr": 3.8498650446521264e-06, "epoch": 8.389252948885977, "percentage": 83.89, "elapsed_time": "0:52:52", "remaining_time": "0:10:09", "throughput": 2705.68, "total_tokens": 8584992}
|
| 6418 |
+
{"current_steps": 32010, "total_steps": 38150, "loss": 0.116, "lr": 3.843769192974925e-06, "epoch": 8.390563564875492, "percentage": 83.91, "elapsed_time": "0:52:53", "remaining_time": "0:10:08", "throughput": 2705.67, "total_tokens": 8586080}
|
| 6419 |
+
{"current_steps": 32015, "total_steps": 38150, "loss": 0.1017, "lr": 3.837677769260273e-06, "epoch": 8.391874180865006, "percentage": 83.92, "elapsed_time": "0:52:53", "remaining_time": "0:10:08", "throughput": 2705.72, "total_tokens": 8587424}
|
| 6420 |
+
{"current_steps": 32020, "total_steps": 38150, "loss": 0.0929, "lr": 3.8315907747831e-06, "epoch": 8.393184796854522, "percentage": 83.93, "elapsed_time": "0:52:54", "remaining_time": "0:10:07", "throughput": 2705.74, "total_tokens": 8588720}
|
| 6421 |
+
{"current_steps": 32025, "total_steps": 38150, "loss": 0.1565, "lr": 3.825508210817402e-06, "epoch": 8.394495412844037, "percentage": 83.94, "elapsed_time": "0:52:54", "remaining_time": "0:10:07", "throughput": 2705.79, "total_tokens": 8590144}
|
| 6422 |
+
{"current_steps": 32030, "total_steps": 38150, "loss": 0.1308, "lr": 3.8194300786362465e-06, "epoch": 8.395806028833551, "percentage": 83.96, "elapsed_time": "0:52:55", "remaining_time": "0:10:06", "throughput": 2705.85, "total_tokens": 8591520}
|
| 6423 |
+
{"current_steps": 32035, "total_steps": 38150, "loss": 0.1685, "lr": 3.8133563795117816e-06, "epoch": 8.397116644823067, "percentage": 83.97, "elapsed_time": "0:52:55", "remaining_time": "0:10:06", "throughput": 2705.87, "total_tokens": 8592816}
|
| 6424 |
+
{"current_steps": 32040, "total_steps": 38150, "loss": 0.1645, "lr": 3.807287114715216e-06, "epoch": 8.398427260812582, "percentage": 83.98, "elapsed_time": "0:52:56", "remaining_time": "0:10:05", "throughput": 2705.83, "total_tokens": 8593824}
|
| 6425 |
+
{"current_steps": 32045, "total_steps": 38150, "loss": 0.2185, "lr": 3.801222285516842e-06, "epoch": 8.399737876802098, "percentage": 84.0, "elapsed_time": "0:52:56", "remaining_time": "0:10:05", "throughput": 2705.86, "total_tokens": 8595040}
|
| 6426 |
+
{"current_steps": 32050, "total_steps": 38150, "loss": 0.1715, "lr": 3.7951618931860112e-06, "epoch": 8.401048492791611, "percentage": 84.01, "elapsed_time": "0:52:56", "remaining_time": "0:10:04", "throughput": 2705.88, "total_tokens": 8596240}
|
| 6427 |
+
{"current_steps": 32055, "total_steps": 38150, "loss": 0.1847, "lr": 3.789105938991161e-06, "epoch": 8.402359108781127, "percentage": 84.02, "elapsed_time": "0:52:57", "remaining_time": "0:10:04", "throughput": 2705.9, "total_tokens": 8597424}
|
| 6428 |
+
{"current_steps": 32060, "total_steps": 38150, "loss": 0.1153, "lr": 3.7830544241997864e-06, "epoch": 8.403669724770642, "percentage": 84.04, "elapsed_time": "0:52:57", "remaining_time": "0:10:03", "throughput": 2705.89, "total_tokens": 8598512}
|
| 6429 |
+
{"current_steps": 32065, "total_steps": 38150, "loss": 0.119, "lr": 3.7770073500784596e-06, "epoch": 8.404980340760158, "percentage": 84.05, "elapsed_time": "0:52:58", "remaining_time": "0:10:03", "throughput": 2705.87, "total_tokens": 8599568}
|
| 6430 |
+
{"current_steps": 32070, "total_steps": 38150, "loss": 0.1293, "lr": 3.7709647178928246e-06, "epoch": 8.406290956749672, "percentage": 84.06, "elapsed_time": "0:52:58", "remaining_time": "0:10:02", "throughput": 2705.88, "total_tokens": 8600752}
|
| 6431 |
+
{"current_steps": 32075, "total_steps": 38150, "loss": 0.0827, "lr": 3.7649265289076003e-06, "epoch": 8.407601572739187, "percentage": 84.08, "elapsed_time": "0:52:58", "remaining_time": "0:10:02", "throughput": 2705.81, "total_tokens": 8601632}
|
| 6432 |
+
{"current_steps": 32080, "total_steps": 38150, "loss": 0.2435, "lr": 3.7588927843865556e-06, "epoch": 8.408912188728703, "percentage": 84.09, "elapsed_time": "0:52:59", "remaining_time": "0:10:01", "throughput": 2705.9, "total_tokens": 8603216}
|
| 6433 |
+
{"current_steps": 32085, "total_steps": 38150, "loss": 0.1845, "lr": 3.7528634855925475e-06, "epoch": 8.410222804718218, "percentage": 84.1, "elapsed_time": "0:52:59", "remaining_time": "0:10:01", "throughput": 2705.96, "total_tokens": 8604672}
|
| 6434 |
+
{"current_steps": 32090, "total_steps": 38150, "loss": 0.0866, "lr": 3.7468386337875028e-06, "epoch": 8.411533420707732, "percentage": 84.12, "elapsed_time": "0:53:00", "remaining_time": "0:10:00", "throughput": 2705.97, "total_tokens": 8605856}
|
| 6435 |
+
{"current_steps": 32095, "total_steps": 38150, "loss": 0.1453, "lr": 3.7408182302324014e-06, "epoch": 8.412844036697248, "percentage": 84.13, "elapsed_time": "0:53:00", "remaining_time": "0:10:00", "throughput": 2705.95, "total_tokens": 8606912}
|
| 6436 |
+
{"current_steps": 32100, "total_steps": 38150, "loss": 0.2066, "lr": 3.734802276187319e-06, "epoch": 8.414154652686763, "percentage": 84.14, "elapsed_time": "0:53:01", "remaining_time": "0:09:59", "throughput": 2706.07, "total_tokens": 8608640}
|
| 6437 |
+
{"current_steps": 32105, "total_steps": 38150, "loss": 0.134, "lr": 3.7287907729113754e-06, "epoch": 8.415465268676279, "percentage": 84.15, "elapsed_time": "0:53:01", "remaining_time": "0:09:59", "throughput": 2706.08, "total_tokens": 8609920}
|
| 6438 |
+
{"current_steps": 32110, "total_steps": 38150, "loss": 0.1552, "lr": 3.722783721662773e-06, "epoch": 8.416775884665793, "percentage": 84.17, "elapsed_time": "0:53:02", "remaining_time": "0:09:58", "throughput": 2706.15, "total_tokens": 8611328}
|
| 6439 |
+
{"current_steps": 32115, "total_steps": 38150, "loss": 0.098, "lr": 3.7167811236987704e-06, "epoch": 8.418086500655308, "percentage": 84.18, "elapsed_time": "0:53:02", "remaining_time": "0:09:58", "throughput": 2706.19, "total_tokens": 8612608}
|
| 6440 |
+
{"current_steps": 32120, "total_steps": 38150, "loss": 0.1691, "lr": 3.710782980275704e-06, "epoch": 8.419397116644824, "percentage": 84.19, "elapsed_time": "0:53:03", "remaining_time": "0:09:57", "throughput": 2706.25, "total_tokens": 8614016}
|
| 6441 |
+
{"current_steps": 32125, "total_steps": 38150, "loss": 0.2358, "lr": 3.704789292648975e-06, "epoch": 8.420707732634337, "percentage": 84.21, "elapsed_time": "0:53:03", "remaining_time": "0:09:57", "throughput": 2706.34, "total_tokens": 8615616}
|
| 6442 |
+
{"current_steps": 32130, "total_steps": 38150, "loss": 0.0962, "lr": 3.6988000620730516e-06, "epoch": 8.422018348623853, "percentage": 84.22, "elapsed_time": "0:53:03", "remaining_time": "0:09:56", "throughput": 2706.34, "total_tokens": 8616832}
|
| 6443 |
+
{"current_steps": 32135, "total_steps": 38150, "loss": 0.1602, "lr": 3.6928152898014684e-06, "epoch": 8.423328964613368, "percentage": 84.23, "elapsed_time": "0:53:04", "remaining_time": "0:09:56", "throughput": 2706.46, "total_tokens": 8618560}
|
| 6444 |
+
{"current_steps": 32140, "total_steps": 38150, "loss": 0.1698, "lr": 3.686834977086831e-06, "epoch": 8.424639580602884, "percentage": 84.25, "elapsed_time": "0:53:04", "remaining_time": "0:09:55", "throughput": 2706.46, "total_tokens": 8619776}
|
| 6445 |
+
{"current_steps": 32145, "total_steps": 38150, "loss": 0.1564, "lr": 3.680859125180805e-06, "epoch": 8.425950196592398, "percentage": 84.26, "elapsed_time": "0:53:05", "remaining_time": "0:09:55", "throughput": 2706.48, "total_tokens": 8621008}
|
| 6446 |
+
{"current_steps": 32150, "total_steps": 38150, "loss": 0.2266, "lr": 3.674887735334126e-06, "epoch": 8.427260812581913, "percentage": 84.27, "elapsed_time": "0:53:05", "remaining_time": "0:09:54", "throughput": 2706.63, "total_tokens": 8622928}
|
| 6447 |
+
{"current_steps": 32155, "total_steps": 38150, "loss": 0.1436, "lr": 3.6689208087966e-06, "epoch": 8.428571428571429, "percentage": 84.29, "elapsed_time": "0:53:06", "remaining_time": "0:09:54", "throughput": 2706.68, "total_tokens": 8624320}
|
| 6448 |
+
{"current_steps": 32160, "total_steps": 38150, "loss": 0.2022, "lr": 3.6629583468170914e-06, "epoch": 8.429882044560944, "percentage": 84.3, "elapsed_time": "0:53:06", "remaining_time": "0:09:53", "throughput": 2706.73, "total_tokens": 8625648}
|
| 6449 |
+
{"current_steps": 32165, "total_steps": 38150, "loss": 0.1165, "lr": 3.65700035064353e-06, "epoch": 8.431192660550458, "percentage": 84.31, "elapsed_time": "0:53:07", "remaining_time": "0:09:53", "throughput": 2706.79, "total_tokens": 8627152}
|
| 6450 |
+
{"current_steps": 32170, "total_steps": 38150, "loss": 0.1118, "lr": 3.6510468215229215e-06, "epoch": 8.432503276539974, "percentage": 84.33, "elapsed_time": "0:53:07", "remaining_time": "0:09:52", "throughput": 2706.81, "total_tokens": 8628464}
|
| 6451 |
+
{"current_steps": 32175, "total_steps": 38150, "loss": 0.0751, "lr": 3.6450977607013217e-06, "epoch": 8.43381389252949, "percentage": 84.34, "elapsed_time": "0:53:08", "remaining_time": "0:09:52", "throughput": 2706.76, "total_tokens": 8629408}
|
| 6452 |
+
{"current_steps": 32180, "total_steps": 38150, "loss": 0.1817, "lr": 3.6391531694238647e-06, "epoch": 8.435124508519005, "percentage": 84.35, "elapsed_time": "0:53:08", "remaining_time": "0:09:51", "throughput": 2706.79, "total_tokens": 8630656}
|
| 6453 |
+
{"current_steps": 32185, "total_steps": 38150, "loss": 0.1237, "lr": 3.6332130489347387e-06, "epoch": 8.436435124508519, "percentage": 84.36, "elapsed_time": "0:53:08", "remaining_time": "0:09:51", "throughput": 2706.83, "total_tokens": 8631936}
|
| 6454 |
+
{"current_steps": 32190, "total_steps": 38150, "loss": 0.2248, "lr": 3.627277400477211e-06, "epoch": 8.437745740498034, "percentage": 84.38, "elapsed_time": "0:53:09", "remaining_time": "0:09:50", "throughput": 2706.95, "total_tokens": 8633584}
|
| 6455 |
+
{"current_steps": 32195, "total_steps": 38150, "loss": 0.11, "lr": 3.6213462252935844e-06, "epoch": 8.43905635648755, "percentage": 84.39, "elapsed_time": "0:53:09", "remaining_time": "0:09:50", "throughput": 2707.11, "total_tokens": 8635584}
|
| 6456 |
+
{"current_steps": 32200, "total_steps": 38150, "loss": 0.2602, "lr": 3.6154195246252575e-06, "epoch": 8.440366972477065, "percentage": 84.4, "elapsed_time": "0:53:10", "remaining_time": "0:09:49", "throughput": 2707.15, "total_tokens": 8636912}
|
| 6457 |
+
{"current_steps": 32205, "total_steps": 38150, "loss": 0.1935, "lr": 3.609497299712672e-06, "epoch": 8.441677588466579, "percentage": 84.42, "elapsed_time": "0:53:10", "remaining_time": "0:09:49", "throughput": 2707.16, "total_tokens": 8638112}
|
| 6458 |
+
{"current_steps": 32210, "total_steps": 38150, "loss": 0.1508, "lr": 3.6035795517953357e-06, "epoch": 8.442988204456094, "percentage": 84.43, "elapsed_time": "0:53:11", "remaining_time": "0:09:48", "throughput": 2707.24, "total_tokens": 8639568}
|
| 6459 |
+
{"current_steps": 32215, "total_steps": 38150, "loss": 0.1197, "lr": 3.597666282111836e-06, "epoch": 8.44429882044561, "percentage": 84.44, "elapsed_time": "0:53:11", "remaining_time": "0:09:48", "throughput": 2707.27, "total_tokens": 8640832}
|
| 6460 |
+
{"current_steps": 32220, "total_steps": 38150, "loss": 0.1654, "lr": 3.5917574918998024e-06, "epoch": 8.445609436435124, "percentage": 84.46, "elapsed_time": "0:53:12", "remaining_time": "0:09:47", "throughput": 2707.34, "total_tokens": 8642192}
|
| 6461 |
+
{"current_steps": 32225, "total_steps": 38150, "loss": 0.1354, "lr": 3.58585318239594e-06, "epoch": 8.44692005242464, "percentage": 84.47, "elapsed_time": "0:53:12", "remaining_time": "0:09:46", "throughput": 2707.32, "total_tokens": 8643328}
|
| 6462 |
+
{"current_steps": 32230, "total_steps": 38150, "loss": 0.1592, "lr": 3.579953354836002e-06, "epoch": 8.448230668414155, "percentage": 84.48, "elapsed_time": "0:53:13", "remaining_time": "0:09:46", "throughput": 2707.29, "total_tokens": 8644400}
|
| 6463 |
+
{"current_steps": 32235, "total_steps": 38150, "loss": 0.11, "lr": 3.5740580104548145e-06, "epoch": 8.44954128440367, "percentage": 84.5, "elapsed_time": "0:53:13", "remaining_time": "0:09:45", "throughput": 2707.36, "total_tokens": 8645920}
|
| 6464 |
+
{"current_steps": 32240, "total_steps": 38150, "loss": 0.1729, "lr": 3.568167150486265e-06, "epoch": 8.450851900393184, "percentage": 84.51, "elapsed_time": "0:53:13", "remaining_time": "0:09:45", "throughput": 2707.39, "total_tokens": 8647152}
|
| 6465 |
+
{"current_steps": 32245, "total_steps": 38150, "loss": 0.0616, "lr": 3.5622807761632985e-06, "epoch": 8.4521625163827, "percentage": 84.52, "elapsed_time": "0:53:14", "remaining_time": "0:09:44", "throughput": 2707.39, "total_tokens": 8648320}
|
| 6466 |
+
{"current_steps": 32250, "total_steps": 38150, "loss": 0.0597, "lr": 3.5563988887179266e-06, "epoch": 8.453473132372215, "percentage": 84.53, "elapsed_time": "0:53:14", "remaining_time": "0:09:44", "throughput": 2707.37, "total_tokens": 8649376}
|
| 6467 |
+
{"current_steps": 32255, "total_steps": 38150, "loss": 0.1387, "lr": 3.5505214893812115e-06, "epoch": 8.45478374836173, "percentage": 84.55, "elapsed_time": "0:53:15", "remaining_time": "0:09:43", "throughput": 2707.48, "total_tokens": 8651136}
|
| 6468 |
+
{"current_steps": 32260, "total_steps": 38150, "loss": 0.1442, "lr": 3.5446485793832918e-06, "epoch": 8.456094364351245, "percentage": 84.56, "elapsed_time": "0:53:15", "remaining_time": "0:09:43", "throughput": 2707.56, "total_tokens": 8652704}
|
| 6469 |
+
{"current_steps": 32265, "total_steps": 38150, "loss": 0.2039, "lr": 3.5387801599533475e-06, "epoch": 8.45740498034076, "percentage": 84.57, "elapsed_time": "0:53:16", "remaining_time": "0:09:43", "throughput": 2707.76, "total_tokens": 8654976}
|
| 6470 |
+
{"current_steps": 32270, "total_steps": 38150, "loss": 0.3272, "lr": 3.5329162323196356e-06, "epoch": 8.458715596330276, "percentage": 84.59, "elapsed_time": "0:53:16", "remaining_time": "0:09:42", "throughput": 2707.86, "total_tokens": 8656640}
|
| 6471 |
+
{"current_steps": 32275, "total_steps": 38150, "loss": 0.2214, "lr": 3.527056797709466e-06, "epoch": 8.460026212319791, "percentage": 84.6, "elapsed_time": "0:53:17", "remaining_time": "0:09:42", "throughput": 2707.89, "total_tokens": 8657968}
|
| 6472 |
+
{"current_steps": 32280, "total_steps": 38150, "loss": 0.2049, "lr": 3.5212018573492047e-06, "epoch": 8.461336828309305, "percentage": 84.61, "elapsed_time": "0:53:17", "remaining_time": "0:09:41", "throughput": 2708.06, "total_tokens": 8660048}
|
| 6473 |
+
{"current_steps": 32285, "total_steps": 38150, "loss": 0.0792, "lr": 3.5153514124642822e-06, "epoch": 8.46264744429882, "percentage": 84.63, "elapsed_time": "0:53:18", "remaining_time": "0:09:41", "throughput": 2708.08, "total_tokens": 8661280}
|
| 6474 |
+
{"current_steps": 32290, "total_steps": 38150, "loss": 0.2166, "lr": 3.5095054642791862e-06, "epoch": 8.463958060288336, "percentage": 84.64, "elapsed_time": "0:53:18", "remaining_time": "0:09:40", "throughput": 2708.24, "total_tokens": 8663200}
|
| 6475 |
+
{"current_steps": 32295, "total_steps": 38150, "loss": 0.1047, "lr": 3.503664014017463e-06, "epoch": 8.46526867627785, "percentage": 84.65, "elapsed_time": "0:53:19", "remaining_time": "0:09:40", "throughput": 2708.33, "total_tokens": 8664768}
|
| 6476 |
+
{"current_steps": 32300, "total_steps": 38150, "loss": 0.1775, "lr": 3.4978270629017207e-06, "epoch": 8.466579292267365, "percentage": 84.67, "elapsed_time": "0:53:19", "remaining_time": "0:09:39", "throughput": 2708.3, "total_tokens": 8665824}
|
| 6477 |
+
{"current_steps": 32305, "total_steps": 38150, "loss": 0.1868, "lr": 3.491994612153618e-06, "epoch": 8.46788990825688, "percentage": 84.68, "elapsed_time": "0:53:20", "remaining_time": "0:09:39", "throughput": 2708.36, "total_tokens": 8667200}
|
| 6478 |
+
{"current_steps": 32310, "total_steps": 38150, "loss": 0.1313, "lr": 3.4861666629938896e-06, "epoch": 8.469200524246396, "percentage": 84.69, "elapsed_time": "0:53:20", "remaining_time": "0:09:38", "throughput": 2708.43, "total_tokens": 8668640}
|
| 6479 |
+
{"current_steps": 32315, "total_steps": 38150, "loss": 0.1158, "lr": 3.4803432166422983e-06, "epoch": 8.47051114023591, "percentage": 84.71, "elapsed_time": "0:53:21", "remaining_time": "0:09:37", "throughput": 2708.42, "total_tokens": 8669696}
|
| 6480 |
+
{"current_steps": 32320, "total_steps": 38150, "loss": 0.1713, "lr": 3.474524274317689e-06, "epoch": 8.471821756225426, "percentage": 84.72, "elapsed_time": "0:53:21", "remaining_time": "0:09:37", "throughput": 2708.42, "total_tokens": 8670848}
|
| 6481 |
+
{"current_steps": 32325, "total_steps": 38150, "loss": 0.1415, "lr": 3.468709837237949e-06, "epoch": 8.473132372214941, "percentage": 84.73, "elapsed_time": "0:53:21", "remaining_time": "0:09:36", "throughput": 2708.42, "total_tokens": 8671968}
|
| 6482 |
+
{"current_steps": 32330, "total_steps": 38150, "loss": 0.1746, "lr": 3.4628999066200427e-06, "epoch": 8.474442988204457, "percentage": 84.74, "elapsed_time": "0:53:22", "remaining_time": "0:09:36", "throughput": 2708.33, "total_tokens": 8672768}
|
| 6483 |
+
{"current_steps": 32335, "total_steps": 38150, "loss": 0.0925, "lr": 3.4570944836799734e-06, "epoch": 8.47575360419397, "percentage": 84.76, "elapsed_time": "0:53:22", "remaining_time": "0:09:35", "throughput": 2708.39, "total_tokens": 8674176}
|
| 6484 |
+
{"current_steps": 32340, "total_steps": 38150, "loss": 0.1788, "lr": 3.4512935696328045e-06, "epoch": 8.477064220183486, "percentage": 84.77, "elapsed_time": "0:53:23", "remaining_time": "0:09:35", "throughput": 2708.46, "total_tokens": 8675696}
|
| 6485 |
+
{"current_steps": 32345, "total_steps": 38150, "loss": 0.1365, "lr": 3.4454971656926655e-06, "epoch": 8.478374836173002, "percentage": 84.78, "elapsed_time": "0:53:23", "remaining_time": "0:09:34", "throughput": 2708.56, "total_tokens": 8677280}
|
| 6486 |
+
{"current_steps": 32350, "total_steps": 38150, "loss": 0.1117, "lr": 3.4397052730727214e-06, "epoch": 8.479685452162517, "percentage": 84.8, "elapsed_time": "0:53:24", "remaining_time": "0:09:34", "throughput": 2708.55, "total_tokens": 8678400}
|
| 6487 |
+
{"current_steps": 32355, "total_steps": 38150, "loss": 0.1695, "lr": 3.4339178929852085e-06, "epoch": 8.48099606815203, "percentage": 84.81, "elapsed_time": "0:53:24", "remaining_time": "0:09:33", "throughput": 2708.59, "total_tokens": 8679696}
|
| 6488 |
+
{"current_steps": 32360, "total_steps": 38150, "loss": 0.1876, "lr": 3.4281350266414158e-06, "epoch": 8.482306684141546, "percentage": 84.82, "elapsed_time": "0:53:24", "remaining_time": "0:09:33", "throughput": 2708.55, "total_tokens": 8680720}
|
| 6489 |
+
{"current_steps": 32365, "total_steps": 38150, "loss": 0.1651, "lr": 3.422356675251692e-06, "epoch": 8.483617300131062, "percentage": 84.84, "elapsed_time": "0:53:25", "remaining_time": "0:09:32", "throughput": 2708.61, "total_tokens": 8682160}
|
| 6490 |
+
{"current_steps": 32370, "total_steps": 38150, "loss": 0.179, "lr": 3.4165828400254314e-06, "epoch": 8.484927916120578, "percentage": 84.85, "elapsed_time": "0:53:25", "remaining_time": "0:09:32", "throughput": 2708.7, "total_tokens": 8683728}
|
| 6491 |
+
{"current_steps": 32375, "total_steps": 38150, "loss": 0.1244, "lr": 3.4108135221710885e-06, "epoch": 8.486238532110091, "percentage": 84.86, "elapsed_time": "0:53:26", "remaining_time": "0:09:31", "throughput": 2708.65, "total_tokens": 8684704}
|
| 6492 |
+
{"current_steps": 32380, "total_steps": 38150, "loss": 0.1629, "lr": 3.405048722896173e-06, "epoch": 8.487549148099607, "percentage": 84.88, "elapsed_time": "0:53:26", "remaining_time": "0:09:31", "throughput": 2708.74, "total_tokens": 8686288}
|
| 6493 |
+
{"current_steps": 32385, "total_steps": 38150, "loss": 0.1607, "lr": 3.3992884434072474e-06, "epoch": 8.488859764089122, "percentage": 84.89, "elapsed_time": "0:53:27", "remaining_time": "0:09:30", "throughput": 2708.73, "total_tokens": 8687408}
|
| 6494 |
+
{"current_steps": 32390, "total_steps": 38150, "loss": 0.1268, "lr": 3.3935326849099253e-06, "epoch": 8.490170380078636, "percentage": 84.9, "elapsed_time": "0:53:27", "remaining_time": "0:09:30", "throughput": 2708.74, "total_tokens": 8688592}
|
| 6495 |
+
{"current_steps": 32395, "total_steps": 38150, "loss": 0.1484, "lr": 3.3877814486088816e-06, "epoch": 8.491480996068152, "percentage": 84.91, "elapsed_time": "0:53:28", "remaining_time": "0:09:29", "throughput": 2708.76, "total_tokens": 8689792}
|
| 6496 |
+
{"current_steps": 32400, "total_steps": 38150, "loss": 0.124, "lr": 3.382034735707834e-06, "epoch": 8.492791612057667, "percentage": 84.93, "elapsed_time": "0:53:28", "remaining_time": "0:09:29", "throughput": 2708.76, "total_tokens": 8690896}
|
| 6497 |
+
{"current_steps": 32405, "total_steps": 38150, "loss": 0.1029, "lr": 3.376292547409568e-06, "epoch": 8.494102228047183, "percentage": 84.94, "elapsed_time": "0:53:28", "remaining_time": "0:09:28", "throughput": 2708.74, "total_tokens": 8691968}
|
| 6498 |
+
{"current_steps": 32410, "total_steps": 38150, "loss": 0.2085, "lr": 3.3705548849159054e-06, "epoch": 8.495412844036696, "percentage": 84.95, "elapsed_time": "0:53:29", "remaining_time": "0:09:28", "throughput": 2708.84, "total_tokens": 8693648}
|
| 6499 |
+
{"current_steps": 32415, "total_steps": 38150, "loss": 0.1569, "lr": 3.364821749427732e-06, "epoch": 8.496723460026212, "percentage": 84.97, "elapsed_time": "0:53:29", "remaining_time": "0:09:27", "throughput": 2708.92, "total_tokens": 8695056}
|
| 6500 |
+
{"current_steps": 32420, "total_steps": 38150, "loss": 0.0791, "lr": 3.3590931421449867e-06, "epoch": 8.498034076015728, "percentage": 84.98, "elapsed_time": "0:53:30", "remaining_time": "0:09:27", "throughput": 2708.96, "total_tokens": 8696368}
|
| 6501 |
+
{"current_steps": 32425, "total_steps": 38150, "loss": 0.0686, "lr": 3.3533690642666595e-06, "epoch": 8.499344692005243, "percentage": 84.99, "elapsed_time": "0:53:30", "remaining_time": "0:09:26", "throughput": 2708.95, "total_tokens": 8697472}
|
| 6502 |
+
{"current_steps": 32430, "total_steps": 38150, "loss": 0.1706, "lr": 3.347649516990778e-06, "epoch": 8.500655307994757, "percentage": 85.01, "elapsed_time": "0:53:31", "remaining_time": "0:09:26", "throughput": 2708.99, "total_tokens": 8698880}
|
| 6503 |
+
{"current_steps": 32435, "total_steps": 38150, "loss": 0.0856, "lr": 3.3419345015144406e-06, "epoch": 8.501965923984272, "percentage": 85.02, "elapsed_time": "0:53:31", "remaining_time": "0:09:25", "throughput": 2709.02, "total_tokens": 8700096}
|
| 6504 |
+
{"current_steps": 32436, "total_steps": 38150, "eval_loss": 0.8556064963340759, "epoch": 8.502228047182175, "percentage": 85.02, "elapsed_time": "0:53:48", "remaining_time": "0:09:28", "throughput": 2695.03, "total_tokens": 8700416}
|
| 6505 |
+
{"current_steps": 32440, "total_steps": 38150, "loss": 0.3147, "lr": 3.3362240190337817e-06, "epoch": 8.503276539973788, "percentage": 85.03, "elapsed_time": "0:53:49", "remaining_time": "0:09:28", "throughput": 2694.0, "total_tokens": 8701568}
|
| 6506 |
+
{"current_steps": 32445, "total_steps": 38150, "loss": 0.1517, "lr": 3.3305180707440124e-06, "epoch": 8.504587155963304, "percentage": 85.05, "elapsed_time": "0:53:50", "remaining_time": "0:09:28", "throughput": 2694.04, "total_tokens": 8702976}
|
| 6507 |
+
{"current_steps": 32450, "total_steps": 38150, "loss": 0.2401, "lr": 3.3248166578393665e-06, "epoch": 8.505897771952817, "percentage": 85.06, "elapsed_time": "0:53:50", "remaining_time": "0:09:27", "throughput": 2694.06, "total_tokens": 8704208}
|
| 6508 |
+
{"current_steps": 32455, "total_steps": 38150, "loss": 0.2879, "lr": 3.319119781513144e-06, "epoch": 8.507208387942333, "percentage": 85.07, "elapsed_time": "0:53:51", "remaining_time": "0:09:27", "throughput": 2694.09, "total_tokens": 8705440}
|
| 6509 |
+
{"current_steps": 32460, "total_steps": 38150, "loss": 0.0348, "lr": 3.313427442957695e-06, "epoch": 8.508519003931848, "percentage": 85.09, "elapsed_time": "0:53:51", "remaining_time": "0:09:26", "throughput": 2694.03, "total_tokens": 8706368}
|
| 6510 |
+
{"current_steps": 32465, "total_steps": 38150, "loss": 0.1652, "lr": 3.3077396433644046e-06, "epoch": 8.509829619921362, "percentage": 85.1, "elapsed_time": "0:53:52", "remaining_time": "0:09:25", "throughput": 2694.15, "total_tokens": 8708048}
|