Training in progress, step 38480
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +382 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:385482200b4e3cfb02246c9ac7ef0cb6796f52ab5a2c779dc640d89e1fe2bf23
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -7333,3 +7333,385 @@
|
|
| 7333 |
{"current_steps": 36570, "total_steps": 38480, "loss": 0.2455, "lr": 0.00022482228898369592, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:26:10", "remaining_time": "0:04:30", "throughput": 1348.55, "total_tokens": 6973328}
|
| 7334 |
{"current_steps": 36575, "total_steps": 38480, "loss": 0.2857, "lr": 0.00022365029241445898, "epoch": 19.00987525987526, "percentage": 95.05, "elapsed_time": "1:26:11", "remaining_time": "0:04:29", "throughput": 1348.55, "total_tokens": 6974256}
|
| 7335 |
{"current_steps": 36580, "total_steps": 38480, "loss": 0.2803, "lr": 0.00022248133569247763, "epoch": 19.012474012474012, "percentage": 95.06, "elapsed_time": "1:26:12", "remaining_time": "0:04:28", "throughput": 1348.57, "total_tokens": 6975248}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7333 |
{"current_steps": 36570, "total_steps": 38480, "loss": 0.2455, "lr": 0.00022482228898369592, "epoch": 19.007276507276508, "percentage": 95.04, "elapsed_time": "1:26:10", "remaining_time": "0:04:30", "throughput": 1348.55, "total_tokens": 6973328}
|
| 7334 |
{"current_steps": 36575, "total_steps": 38480, "loss": 0.2857, "lr": 0.00022365029241445898, "epoch": 19.00987525987526, "percentage": 95.05, "elapsed_time": "1:26:11", "remaining_time": "0:04:29", "throughput": 1348.55, "total_tokens": 6974256}
|
| 7335 |
{"current_steps": 36580, "total_steps": 38480, "loss": 0.2803, "lr": 0.00022248133569247763, "epoch": 19.012474012474012, "percentage": 95.06, "elapsed_time": "1:26:12", "remaining_time": "0:04:28", "throughput": 1348.57, "total_tokens": 6975248}
|
| 7336 |
+
{"current_steps": 36585, "total_steps": 38480, "loss": 0.248, "lr": 0.0002213154190582328, "epoch": 19.015072765072766, "percentage": 95.08, "elapsed_time": "1:26:13", "remaining_time": "0:04:27", "throughput": 1348.59, "total_tokens": 6976272}
|
| 7337 |
+
{"current_steps": 36590, "total_steps": 38480, "loss": 0.2909, "lr": 0.00022015254275158268, "epoch": 19.017671517671516, "percentage": 95.09, "elapsed_time": "1:26:13", "remaining_time": "0:04:27", "throughput": 1348.58, "total_tokens": 6977136}
|
| 7338 |
+
{"current_steps": 36595, "total_steps": 38480, "loss": 0.2737, "lr": 0.00021899270701175754, "epoch": 19.02027027027027, "percentage": 95.1, "elapsed_time": "1:26:14", "remaining_time": "0:04:26", "throughput": 1348.58, "total_tokens": 6978032}
|
| 7339 |
+
{"current_steps": 36600, "total_steps": 38480, "loss": 0.261, "lr": 0.0002178359120773632, "epoch": 19.022869022869024, "percentage": 95.11, "elapsed_time": "1:26:15", "remaining_time": "0:04:25", "throughput": 1348.59, "total_tokens": 6978960}
|
| 7340 |
+
{"current_steps": 36605, "total_steps": 38480, "loss": 0.2498, "lr": 0.00021668215818638093, "epoch": 19.025467775467774, "percentage": 95.13, "elapsed_time": "1:26:15", "remaining_time": "0:04:25", "throughput": 1348.59, "total_tokens": 6979888}
|
| 7341 |
+
{"current_steps": 36610, "total_steps": 38480, "loss": 0.2481, "lr": 0.00021553144557616254, "epoch": 19.028066528066528, "percentage": 95.14, "elapsed_time": "1:26:16", "remaining_time": "0:04:24", "throughput": 1348.61, "total_tokens": 6980880}
|
| 7342 |
+
{"current_steps": 36615, "total_steps": 38480, "loss": 0.2253, "lr": 0.00021438377448344202, "epoch": 19.030665280665282, "percentage": 95.15, "elapsed_time": "1:26:17", "remaining_time": "0:04:23", "throughput": 1348.63, "total_tokens": 6981872}
|
| 7343 |
+
{"current_steps": 36620, "total_steps": 38480, "loss": 0.239, "lr": 0.00021323914514431884, "epoch": 19.033264033264032, "percentage": 95.17, "elapsed_time": "1:26:17", "remaining_time": "0:04:22", "throughput": 1348.63, "total_tokens": 6982768}
|
| 7344 |
+
{"current_steps": 36625, "total_steps": 38480, "loss": 0.2515, "lr": 0.0002120975577942713, "epoch": 19.035862785862786, "percentage": 95.18, "elapsed_time": "1:26:18", "remaining_time": "0:04:22", "throughput": 1348.64, "total_tokens": 6983760}
|
| 7345 |
+
{"current_steps": 36630, "total_steps": 38480, "loss": 0.2556, "lr": 0.00021095901266815153, "epoch": 19.03846153846154, "percentage": 95.19, "elapsed_time": "1:26:19", "remaining_time": "0:04:21", "throughput": 1348.65, "total_tokens": 6984720}
|
| 7346 |
+
{"current_steps": 36635, "total_steps": 38480, "loss": 0.3254, "lr": 0.0002098235100001855, "epoch": 19.04106029106029, "percentage": 95.21, "elapsed_time": "1:26:19", "remaining_time": "0:04:20", "throughput": 1348.66, "total_tokens": 6985680}
|
| 7347 |
+
{"current_steps": 36640, "total_steps": 38480, "loss": 0.2521, "lr": 0.00020869105002397303, "epoch": 19.043659043659044, "percentage": 95.22, "elapsed_time": "1:26:20", "remaining_time": "0:04:20", "throughput": 1348.67, "total_tokens": 6986608}
|
| 7348 |
+
{"current_steps": 36645, "total_steps": 38480, "loss": 0.1984, "lr": 0.00020756163297248775, "epoch": 19.046257796257795, "percentage": 95.23, "elapsed_time": "1:26:21", "remaining_time": "0:04:19", "throughput": 1348.68, "total_tokens": 6987568}
|
| 7349 |
+
{"current_steps": 36650, "total_steps": 38480, "loss": 0.2462, "lr": 0.00020643525907807712, "epoch": 19.04885654885655, "percentage": 95.24, "elapsed_time": "1:26:21", "remaining_time": "0:04:18", "throughput": 1348.68, "total_tokens": 6988496}
|
| 7350 |
+
{"current_steps": 36655, "total_steps": 38480, "loss": 0.2312, "lr": 0.0002053119285724658, "epoch": 19.051455301455302, "percentage": 95.26, "elapsed_time": "1:26:22", "remaining_time": "0:04:18", "throughput": 1348.68, "total_tokens": 6989392}
|
| 7351 |
+
{"current_steps": 36660, "total_steps": 38480, "loss": 0.3075, "lr": 0.0002041916416867473, "epoch": 19.054054054054053, "percentage": 95.27, "elapsed_time": "1:26:23", "remaining_time": "0:04:17", "throughput": 1348.69, "total_tokens": 6990320}
|
| 7352 |
+
{"current_steps": 36665, "total_steps": 38480, "loss": 0.2566, "lr": 0.00020307439865138886, "epoch": 19.056652806652806, "percentage": 95.28, "elapsed_time": "1:26:23", "remaining_time": "0:04:16", "throughput": 1348.7, "total_tokens": 6991280}
|
| 7353 |
+
{"current_steps": 36670, "total_steps": 38480, "loss": 0.2841, "lr": 0.00020196019969624, "epoch": 19.05925155925156, "percentage": 95.3, "elapsed_time": "1:26:24", "remaining_time": "0:04:15", "throughput": 1348.7, "total_tokens": 6992208}
|
| 7354 |
+
{"current_steps": 36675, "total_steps": 38480, "loss": 0.2325, "lr": 0.00020084904505051403, "epoch": 19.06185031185031, "percentage": 95.31, "elapsed_time": "1:26:25", "remaining_time": "0:04:15", "throughput": 1348.72, "total_tokens": 6993168}
|
| 7355 |
+
{"current_steps": 36680, "total_steps": 38480, "loss": 0.2273, "lr": 0.0001997409349428031, "epoch": 19.064449064449065, "percentage": 95.32, "elapsed_time": "1:26:25", "remaining_time": "0:04:14", "throughput": 1348.73, "total_tokens": 6994128}
|
| 7356 |
+
{"current_steps": 36685, "total_steps": 38480, "loss": 0.2609, "lr": 0.00019863586960107147, "epoch": 19.06704781704782, "percentage": 95.34, "elapsed_time": "1:26:26", "remaining_time": "0:04:13", "throughput": 1348.74, "total_tokens": 6995088}
|
| 7357 |
+
{"current_steps": 36690, "total_steps": 38480, "loss": 0.2466, "lr": 0.00019753384925265738, "epoch": 19.06964656964657, "percentage": 95.35, "elapsed_time": "1:26:27", "remaining_time": "0:04:13", "throughput": 1348.74, "total_tokens": 6995984}
|
| 7358 |
+
{"current_steps": 36695, "total_steps": 38480, "loss": 0.2974, "lr": 0.00019643487412427274, "epoch": 19.072245322245323, "percentage": 95.36, "elapsed_time": "1:26:27", "remaining_time": "0:04:12", "throughput": 1348.74, "total_tokens": 6996912}
|
| 7359 |
+
{"current_steps": 36700, "total_steps": 38480, "loss": 0.2734, "lr": 0.0001953389444420034, "epoch": 19.074844074844076, "percentage": 95.37, "elapsed_time": "1:26:28", "remaining_time": "0:04:11", "throughput": 1348.75, "total_tokens": 6997840}
|
| 7360 |
+
{"current_steps": 36705, "total_steps": 38480, "loss": 0.2417, "lr": 0.0001942460604313073, "epoch": 19.077442827442827, "percentage": 95.39, "elapsed_time": "1:26:29", "remaining_time": "0:04:10", "throughput": 1348.76, "total_tokens": 6998832}
|
| 7361 |
+
{"current_steps": 36710, "total_steps": 38480, "loss": 0.237, "lr": 0.000193156222317018, "epoch": 19.08004158004158, "percentage": 95.4, "elapsed_time": "1:26:29", "remaining_time": "0:04:10", "throughput": 1348.78, "total_tokens": 6999824}
|
| 7362 |
+
{"current_steps": 36715, "total_steps": 38480, "loss": 0.2604, "lr": 0.00019206943032333944, "epoch": 19.08264033264033, "percentage": 95.41, "elapsed_time": "1:26:30", "remaining_time": "0:04:09", "throughput": 1348.79, "total_tokens": 7000784}
|
| 7363 |
+
{"current_steps": 36720, "total_steps": 38480, "loss": 0.2534, "lr": 0.00019098568467385278, "epoch": 19.085239085239085, "percentage": 95.43, "elapsed_time": "1:26:31", "remaining_time": "0:04:08", "throughput": 1348.79, "total_tokens": 7001712}
|
| 7364 |
+
{"current_steps": 36725, "total_steps": 38480, "loss": 0.2501, "lr": 0.00018990498559150803, "epoch": 19.08783783783784, "percentage": 95.44, "elapsed_time": "1:26:31", "remaining_time": "0:04:08", "throughput": 1348.81, "total_tokens": 7002704}
|
| 7365 |
+
{"current_steps": 36730, "total_steps": 38480, "loss": 0.2528, "lr": 0.00018882733329863232, "epoch": 19.09043659043659, "percentage": 95.45, "elapsed_time": "1:26:32", "remaining_time": "0:04:07", "throughput": 1348.83, "total_tokens": 7003728}
|
| 7366 |
+
{"current_steps": 36735, "total_steps": 38480, "loss": 0.2793, "lr": 0.00018775272801692332, "epoch": 19.093035343035343, "percentage": 95.47, "elapsed_time": "1:26:33", "remaining_time": "0:04:06", "throughput": 1348.84, "total_tokens": 7004656}
|
| 7367 |
+
{"current_steps": 36740, "total_steps": 38480, "loss": 0.2465, "lr": 0.0001866811699674542, "epoch": 19.095634095634097, "percentage": 95.48, "elapsed_time": "1:26:33", "remaining_time": "0:04:05", "throughput": 1348.86, "total_tokens": 7005680}
|
| 7368 |
+
{"current_steps": 36745, "total_steps": 38480, "loss": 0.2679, "lr": 0.00018561265937067027, "epoch": 19.098232848232847, "percentage": 95.49, "elapsed_time": "1:26:34", "remaining_time": "0:04:05", "throughput": 1348.87, "total_tokens": 7006640}
|
| 7369 |
+
{"current_steps": 36750, "total_steps": 38480, "loss": 0.2324, "lr": 0.0001845471964463874, "epoch": 19.1008316008316, "percentage": 95.5, "elapsed_time": "1:26:35", "remaining_time": "0:04:04", "throughput": 1348.89, "total_tokens": 7007664}
|
| 7370 |
+
{"current_steps": 36755, "total_steps": 38480, "loss": 0.266, "lr": 0.00018348478141379688, "epoch": 19.103430353430355, "percentage": 95.52, "elapsed_time": "1:26:35", "remaining_time": "0:04:03", "throughput": 1348.89, "total_tokens": 7008560}
|
| 7371 |
+
{"current_steps": 36760, "total_steps": 38480, "loss": 0.1817, "lr": 0.00018242541449146554, "epoch": 19.106029106029105, "percentage": 95.53, "elapsed_time": "1:26:36", "remaining_time": "0:04:03", "throughput": 1348.9, "total_tokens": 7009520}
|
| 7372 |
+
{"current_steps": 36765, "total_steps": 38480, "loss": 0.2599, "lr": 0.00018136909589733073, "epoch": 19.10862785862786, "percentage": 95.54, "elapsed_time": "1:26:37", "remaining_time": "0:04:02", "throughput": 1348.91, "total_tokens": 7010448}
|
| 7373 |
+
{"current_steps": 36770, "total_steps": 38480, "loss": 0.2045, "lr": 0.00018031582584869697, "epoch": 19.111226611226613, "percentage": 95.56, "elapsed_time": "1:26:37", "remaining_time": "0:04:01", "throughput": 1348.93, "total_tokens": 7011472}
|
| 7374 |
+
{"current_steps": 36775, "total_steps": 38480, "loss": 0.2959, "lr": 0.0001792656045622526, "epoch": 19.113825363825363, "percentage": 95.57, "elapsed_time": "1:26:38", "remaining_time": "0:04:01", "throughput": 1348.93, "total_tokens": 7012368}
|
| 7375 |
+
{"current_steps": 36780, "total_steps": 38480, "loss": 0.2426, "lr": 0.00017821843225404975, "epoch": 19.116424116424117, "percentage": 95.58, "elapsed_time": "1:26:39", "remaining_time": "0:04:00", "throughput": 1348.92, "total_tokens": 7013232}
|
| 7376 |
+
{"current_steps": 36785, "total_steps": 38480, "loss": 0.2826, "lr": 0.00017717430913951948, "epoch": 19.11902286902287, "percentage": 95.6, "elapsed_time": "1:26:39", "remaining_time": "0:03:59", "throughput": 1348.95, "total_tokens": 7014256}
|
| 7377 |
+
{"current_steps": 36790, "total_steps": 38480, "loss": 0.2624, "lr": 0.00017613323543345992, "epoch": 19.12162162162162, "percentage": 95.61, "elapsed_time": "1:26:40", "remaining_time": "0:03:58", "throughput": 1348.95, "total_tokens": 7015152}
|
| 7378 |
+
{"current_steps": 36795, "total_steps": 38480, "loss": 0.2605, "lr": 0.00017509521135004646, "epoch": 19.124220374220375, "percentage": 95.62, "elapsed_time": "1:26:41", "remaining_time": "0:03:58", "throughput": 1348.94, "total_tokens": 7016016}
|
| 7379 |
+
{"current_steps": 36800, "total_steps": 38480, "loss": 0.2942, "lr": 0.00017406023710282492, "epoch": 19.126819126819125, "percentage": 95.63, "elapsed_time": "1:26:41", "remaining_time": "0:03:57", "throughput": 1348.94, "total_tokens": 7016944}
|
| 7380 |
+
{"current_steps": 36805, "total_steps": 38480, "loss": 0.2017, "lr": 0.00017302831290471498, "epoch": 19.12941787941788, "percentage": 95.65, "elapsed_time": "1:26:42", "remaining_time": "0:03:56", "throughput": 1348.95, "total_tokens": 7017872}
|
| 7381 |
+
{"current_steps": 36810, "total_steps": 38480, "loss": 0.2894, "lr": 0.00017199943896800685, "epoch": 19.132016632016633, "percentage": 95.66, "elapsed_time": "1:26:43", "remaining_time": "0:03:56", "throughput": 1348.98, "total_tokens": 7018960}
|
| 7382 |
+
{"current_steps": 36815, "total_steps": 38480, "loss": 0.2809, "lr": 0.0001709736155043645, "epoch": 19.134615384615383, "percentage": 95.67, "elapsed_time": "1:26:43", "remaining_time": "0:03:55", "throughput": 1349.0, "total_tokens": 7019984}
|
| 7383 |
+
{"current_steps": 36820, "total_steps": 38480, "loss": 0.219, "lr": 0.00016995084272482586, "epoch": 19.137214137214137, "percentage": 95.69, "elapsed_time": "1:26:44", "remaining_time": "0:03:54", "throughput": 1349.01, "total_tokens": 7020912}
|
| 7384 |
+
{"current_steps": 36825, "total_steps": 38480, "loss": 0.2805, "lr": 0.00016893112083979755, "epoch": 19.13981288981289, "percentage": 95.7, "elapsed_time": "1:26:45", "remaining_time": "0:03:53", "throughput": 1349.01, "total_tokens": 7021808}
|
| 7385 |
+
{"current_steps": 36830, "total_steps": 38480, "loss": 0.2431, "lr": 0.00016791445005906347, "epoch": 19.14241164241164, "percentage": 95.71, "elapsed_time": "1:26:45", "remaining_time": "0:03:53", "throughput": 1349.01, "total_tokens": 7022736}
|
| 7386 |
+
{"current_steps": 36835, "total_steps": 38480, "loss": 0.253, "lr": 0.00016690083059177295, "epoch": 19.145010395010395, "percentage": 95.73, "elapsed_time": "1:26:46", "remaining_time": "0:03:52", "throughput": 1349.01, "total_tokens": 7023632}
|
| 7387 |
+
{"current_steps": 36840, "total_steps": 38480, "loss": 0.216, "lr": 0.00016589026264645423, "epoch": 19.14760914760915, "percentage": 95.74, "elapsed_time": "1:26:47", "remaining_time": "0:03:51", "throughput": 1349.03, "total_tokens": 7024656}
|
| 7388 |
+
{"current_steps": 36845, "total_steps": 38480, "loss": 0.2235, "lr": 0.0001648827464310043, "epoch": 19.1502079002079, "percentage": 95.75, "elapsed_time": "1:26:47", "remaining_time": "0:03:51", "throughput": 1349.06, "total_tokens": 7025712}
|
| 7389 |
+
{"current_steps": 36850, "total_steps": 38480, "loss": 0.2978, "lr": 0.00016387828215269572, "epoch": 19.152806652806653, "percentage": 95.76, "elapsed_time": "1:26:48", "remaining_time": "0:03:50", "throughput": 1349.06, "total_tokens": 7026608}
|
| 7390 |
+
{"current_steps": 36855, "total_steps": 38480, "loss": 0.2487, "lr": 0.0001628768700181682, "epoch": 19.155405405405407, "percentage": 95.78, "elapsed_time": "1:26:49", "remaining_time": "0:03:49", "throughput": 1349.07, "total_tokens": 7027568}
|
| 7391 |
+
{"current_steps": 36860, "total_steps": 38480, "loss": 0.2974, "lr": 0.00016187851023343524, "epoch": 19.158004158004157, "percentage": 95.79, "elapsed_time": "1:26:49", "remaining_time": "0:03:48", "throughput": 1349.07, "total_tokens": 7028496}
|
| 7392 |
+
{"current_steps": 36865, "total_steps": 38480, "loss": 0.2873, "lr": 0.00016088320300388593, "epoch": 19.16060291060291, "percentage": 95.8, "elapsed_time": "1:26:50", "remaining_time": "0:03:48", "throughput": 1349.09, "total_tokens": 7029520}
|
| 7393 |
+
{"current_steps": 36870, "total_steps": 38480, "loss": 0.228, "lr": 0.0001598909485342781, "epoch": 19.16320166320166, "percentage": 95.82, "elapsed_time": "1:26:51", "remaining_time": "0:03:47", "throughput": 1349.1, "total_tokens": 7030480}
|
| 7394 |
+
{"current_steps": 36875, "total_steps": 38480, "loss": 0.2531, "lr": 0.00015890174702874017, "epoch": 19.165800415800415, "percentage": 95.83, "elapsed_time": "1:26:51", "remaining_time": "0:03:46", "throughput": 1349.11, "total_tokens": 7031440}
|
| 7395 |
+
{"current_steps": 36880, "total_steps": 38480, "loss": 0.2579, "lr": 0.00015791559869077597, "epoch": 19.16839916839917, "percentage": 95.84, "elapsed_time": "1:26:52", "remaining_time": "0:03:46", "throughput": 1349.12, "total_tokens": 7032368}
|
| 7396 |
+
{"current_steps": 36885, "total_steps": 38480, "loss": 0.2692, "lr": 0.00015693250372325994, "epoch": 19.17099792099792, "percentage": 95.85, "elapsed_time": "1:26:53", "remaining_time": "0:03:45", "throughput": 1349.13, "total_tokens": 7033328}
|
| 7397 |
+
{"current_steps": 36890, "total_steps": 38480, "loss": 0.2628, "lr": 0.0001559524623284386, "epoch": 19.173596673596673, "percentage": 95.87, "elapsed_time": "1:26:53", "remaining_time": "0:03:44", "throughput": 1349.14, "total_tokens": 7034256}
|
| 7398 |
+
{"current_steps": 36895, "total_steps": 38480, "loss": 0.2798, "lr": 0.00015497547470792904, "epoch": 19.176195426195427, "percentage": 95.88, "elapsed_time": "1:26:54", "remaining_time": "0:03:44", "throughput": 1349.13, "total_tokens": 7035120}
|
| 7399 |
+
{"current_steps": 36900, "total_steps": 38480, "loss": 0.2585, "lr": 0.00015400154106272044, "epoch": 19.178794178794178, "percentage": 95.89, "elapsed_time": "1:26:55", "remaining_time": "0:03:43", "throughput": 1349.13, "total_tokens": 7036016}
|
| 7400 |
+
{"current_steps": 36905, "total_steps": 38480, "loss": 0.243, "lr": 0.00015303066159317425, "epoch": 19.18139293139293, "percentage": 95.91, "elapsed_time": "1:26:55", "remaining_time": "0:03:42", "throughput": 1349.13, "total_tokens": 7036944}
|
| 7401 |
+
{"current_steps": 36910, "total_steps": 38480, "loss": 0.2988, "lr": 0.00015206283649902397, "epoch": 19.183991683991685, "percentage": 95.92, "elapsed_time": "1:26:56", "remaining_time": "0:03:41", "throughput": 1349.14, "total_tokens": 7037904}
|
| 7402 |
+
{"current_steps": 36915, "total_steps": 38480, "loss": 0.302, "lr": 0.000151098065979372, "epoch": 19.186590436590436, "percentage": 95.93, "elapsed_time": "1:26:57", "remaining_time": "0:03:41", "throughput": 1349.13, "total_tokens": 7038736}
|
| 7403 |
+
{"current_steps": 36920, "total_steps": 38480, "loss": 0.2311, "lr": 0.00015013635023269965, "epoch": 19.18918918918919, "percentage": 95.95, "elapsed_time": "1:26:57", "remaining_time": "0:03:40", "throughput": 1349.14, "total_tokens": 7039696}
|
| 7404 |
+
{"current_steps": 36925, "total_steps": 38480, "loss": 0.2286, "lr": 0.00014917768945685027, "epoch": 19.191787941787943, "percentage": 95.96, "elapsed_time": "1:26:58", "remaining_time": "0:03:39", "throughput": 1349.14, "total_tokens": 7040560}
|
| 7405 |
+
{"current_steps": 36930, "total_steps": 38480, "loss": 0.2044, "lr": 0.00014822208384904445, "epoch": 19.194386694386694, "percentage": 95.97, "elapsed_time": "1:26:59", "remaining_time": "0:03:39", "throughput": 1349.15, "total_tokens": 7041552}
|
| 7406 |
+
{"current_steps": 36935, "total_steps": 38480, "loss": 0.2905, "lr": 0.0001472695336058749, "epoch": 19.196985446985448, "percentage": 95.98, "elapsed_time": "1:26:59", "remaining_time": "0:03:38", "throughput": 1349.16, "total_tokens": 7042480}
|
| 7407 |
+
{"current_steps": 36940, "total_steps": 38480, "loss": 0.2852, "lr": 0.0001463200389233016, "epoch": 19.1995841995842, "percentage": 96.0, "elapsed_time": "1:27:00", "remaining_time": "0:03:37", "throughput": 1349.17, "total_tokens": 7043472}
|
| 7408 |
+
{"current_steps": 36945, "total_steps": 38480, "loss": 0.2749, "lr": 0.00014537359999665988, "epoch": 19.20218295218295, "percentage": 96.01, "elapsed_time": "1:27:01", "remaining_time": "0:03:36", "throughput": 1349.2, "total_tokens": 7044528}
|
| 7409 |
+
{"current_steps": 36950, "total_steps": 38480, "loss": 0.309, "lr": 0.0001444302170206524, "epoch": 19.204781704781706, "percentage": 96.02, "elapsed_time": "1:27:01", "remaining_time": "0:03:36", "throughput": 1349.21, "total_tokens": 7045456}
|
| 7410 |
+
{"current_steps": 36955, "total_steps": 38480, "loss": 0.2772, "lr": 0.0001434898901893572, "epoch": 19.207380457380456, "percentage": 96.04, "elapsed_time": "1:27:02", "remaining_time": "0:03:35", "throughput": 1349.21, "total_tokens": 7046352}
|
| 7411 |
+
{"current_steps": 36960, "total_steps": 38480, "loss": 0.2165, "lr": 0.00014255261969622457, "epoch": 19.20997920997921, "percentage": 96.05, "elapsed_time": "1:27:03", "remaining_time": "0:03:34", "throughput": 1349.21, "total_tokens": 7047280}
|
| 7412 |
+
{"current_steps": 36965, "total_steps": 38480, "loss": 0.3, "lr": 0.0001416184057340669, "epoch": 19.212577962577964, "percentage": 96.06, "elapsed_time": "1:27:03", "remaining_time": "0:03:34", "throughput": 1349.23, "total_tokens": 7048272}
|
| 7413 |
+
{"current_steps": 36970, "total_steps": 38480, "loss": 0.2822, "lr": 0.00014068724849508051, "epoch": 19.215176715176714, "percentage": 96.08, "elapsed_time": "1:27:04", "remaining_time": "0:03:33", "throughput": 1349.23, "total_tokens": 7049200}
|
| 7414 |
+
{"current_steps": 36975, "total_steps": 38480, "loss": 0.2688, "lr": 0.0001397591481708238, "epoch": 19.217775467775468, "percentage": 96.09, "elapsed_time": "1:27:05", "remaining_time": "0:03:32", "throughput": 1349.24, "total_tokens": 7050160}
|
| 7415 |
+
{"current_steps": 36980, "total_steps": 38480, "loss": 0.2518, "lr": 0.00013883410495222902, "epoch": 19.22037422037422, "percentage": 96.1, "elapsed_time": "1:27:05", "remaining_time": "0:03:31", "throughput": 1349.25, "total_tokens": 7051120}
|
| 7416 |
+
{"current_steps": 36985, "total_steps": 38480, "loss": 0.2568, "lr": 0.000137912119029599, "epoch": 19.222972972972972, "percentage": 96.11, "elapsed_time": "1:27:06", "remaining_time": "0:03:31", "throughput": 1349.25, "total_tokens": 7052016}
|
| 7417 |
+
{"current_steps": 36990, "total_steps": 38480, "loss": 0.2512, "lr": 0.00013699319059261026, "epoch": 19.225571725571726, "percentage": 96.13, "elapsed_time": "1:27:07", "remaining_time": "0:03:30", "throughput": 1349.26, "total_tokens": 7052944}
|
| 7418 |
+
{"current_steps": 36995, "total_steps": 38480, "loss": 0.2437, "lr": 0.00013607731983030668, "epoch": 19.22817047817048, "percentage": 96.14, "elapsed_time": "1:27:07", "remaining_time": "0:03:29", "throughput": 1349.27, "total_tokens": 7053904}
|
| 7419 |
+
{"current_steps": 37000, "total_steps": 38480, "loss": 0.2566, "lr": 0.00013516450693110583, "epoch": 19.23076923076923, "percentage": 96.15, "elapsed_time": "1:27:08", "remaining_time": "0:03:29", "throughput": 1349.29, "total_tokens": 7054928}
|
| 7420 |
+
{"current_steps": 37005, "total_steps": 38480, "loss": 0.2559, "lr": 0.00013425475208279247, "epoch": 19.233367983367984, "percentage": 96.17, "elapsed_time": "1:27:09", "remaining_time": "0:03:28", "throughput": 1349.3, "total_tokens": 7055856}
|
| 7421 |
+
{"current_steps": 37010, "total_steps": 38480, "loss": 0.2401, "lr": 0.0001333480554725286, "epoch": 19.235966735966738, "percentage": 96.18, "elapsed_time": "1:27:09", "remaining_time": "0:03:27", "throughput": 1349.29, "total_tokens": 7056720}
|
| 7422 |
+
{"current_steps": 37015, "total_steps": 38480, "loss": 0.2863, "lr": 0.00013244441728684164, "epoch": 19.238565488565488, "percentage": 96.19, "elapsed_time": "1:27:10", "remaining_time": "0:03:27", "throughput": 1349.3, "total_tokens": 7057712}
|
| 7423 |
+
{"current_steps": 37020, "total_steps": 38480, "loss": 0.2626, "lr": 0.00013154383771163123, "epoch": 19.241164241164242, "percentage": 96.21, "elapsed_time": "1:27:11", "remaining_time": "0:03:26", "throughput": 1349.3, "total_tokens": 7058608}
|
| 7424 |
+
{"current_steps": 37025, "total_steps": 38480, "loss": 0.211, "lr": 0.0001306463169321692, "epoch": 19.243762993762992, "percentage": 96.22, "elapsed_time": "1:27:11", "remaining_time": "0:03:25", "throughput": 1349.31, "total_tokens": 7059568}
|
| 7425 |
+
{"current_steps": 37030, "total_steps": 38480, "loss": 0.3001, "lr": 0.00012975185513309617, "epoch": 19.246361746361746, "percentage": 96.23, "elapsed_time": "1:27:12", "remaining_time": "0:03:24", "throughput": 1349.34, "total_tokens": 7060624}
|
| 7426 |
+
{"current_steps": 37035, "total_steps": 38480, "loss": 0.251, "lr": 0.00012886045249842326, "epoch": 19.2489604989605, "percentage": 96.24, "elapsed_time": "1:27:13", "remaining_time": "0:03:24", "throughput": 1349.36, "total_tokens": 7061648}
|
| 7427 |
+
{"current_steps": 37040, "total_steps": 38480, "loss": 0.227, "lr": 0.0001279721092115321, "epoch": 19.25155925155925, "percentage": 96.26, "elapsed_time": "1:27:14", "remaining_time": "0:03:23", "throughput": 1349.38, "total_tokens": 7062640}
|
| 7428 |
+
{"current_steps": 37045, "total_steps": 38480, "loss": 0.2686, "lr": 0.0001270868254551799, "epoch": 19.254158004158004, "percentage": 96.27, "elapsed_time": "1:27:14", "remaining_time": "0:03:22", "throughput": 1349.39, "total_tokens": 7063600}
|
| 7429 |
+
{"current_steps": 37050, "total_steps": 38480, "loss": 0.3071, "lr": 0.00012620460141148925, "epoch": 19.256756756756758, "percentage": 96.28, "elapsed_time": "1:27:15", "remaining_time": "0:03:22", "throughput": 1349.38, "total_tokens": 7064464}
|
| 7430 |
+
{"current_steps": 37055, "total_steps": 38480, "loss": 0.2358, "lr": 0.00012532543726195334, "epoch": 19.259355509355508, "percentage": 96.3, "elapsed_time": "1:27:16", "remaining_time": "0:03:21", "throughput": 1349.37, "total_tokens": 7065328}
|
| 7431 |
+
{"current_steps": 37060, "total_steps": 38480, "loss": 0.2079, "lr": 0.00012444933318743912, "epoch": 19.261954261954262, "percentage": 96.31, "elapsed_time": "1:27:16", "remaining_time": "0:03:20", "throughput": 1349.38, "total_tokens": 7066256}
|
| 7432 |
+
{"current_steps": 37065, "total_steps": 38480, "loss": 0.2949, "lr": 0.00012357628936818075, "epoch": 19.264553014553016, "percentage": 96.32, "elapsed_time": "1:27:17", "remaining_time": "0:03:19", "throughput": 1349.38, "total_tokens": 7067184}
|
| 7433 |
+
{"current_steps": 37070, "total_steps": 38480, "loss": 0.2605, "lr": 0.00012270630598378295, "epoch": 19.267151767151766, "percentage": 96.34, "elapsed_time": "1:27:18", "remaining_time": "0:03:19", "throughput": 1349.38, "total_tokens": 7068048}
|
| 7434 |
+
{"current_steps": 37075, "total_steps": 38480, "loss": 0.3108, "lr": 0.00012183938321322418, "epoch": 19.26975051975052, "percentage": 96.35, "elapsed_time": "1:27:18", "remaining_time": "0:03:18", "throughput": 1349.38, "total_tokens": 7068976}
|
| 7435 |
+
{"current_steps": 37080, "total_steps": 38480, "loss": 0.2915, "lr": 0.00012097552123485011, "epoch": 19.272349272349274, "percentage": 96.36, "elapsed_time": "1:27:19", "remaining_time": "0:03:17", "throughput": 1349.38, "total_tokens": 7069872}
|
| 7436 |
+
{"current_steps": 37085, "total_steps": 38480, "loss": 0.2383, "lr": 0.00012011472022638025, "epoch": 19.274948024948024, "percentage": 96.37, "elapsed_time": "1:27:20", "remaining_time": "0:03:17", "throughput": 1349.41, "total_tokens": 7070928}
|
| 7437 |
+
{"current_steps": 37090, "total_steps": 38480, "loss": 0.2807, "lr": 0.00011925698036489796, "epoch": 19.277546777546778, "percentage": 96.39, "elapsed_time": "1:27:20", "remaining_time": "0:03:16", "throughput": 1349.42, "total_tokens": 7071888}
|
| 7438 |
+
{"current_steps": 37095, "total_steps": 38480, "loss": 0.2529, "lr": 0.00011840230182686206, "epoch": 19.28014553014553, "percentage": 96.4, "elapsed_time": "1:27:21", "remaining_time": "0:03:15", "throughput": 1349.44, "total_tokens": 7072880}
|
| 7439 |
+
{"current_steps": 37100, "total_steps": 38480, "loss": 0.269, "lr": 0.00011755068478810193, "epoch": 19.282744282744282, "percentage": 96.41, "elapsed_time": "1:27:22", "remaining_time": "0:03:14", "throughput": 1349.44, "total_tokens": 7073808}
|
| 7440 |
+
{"current_steps": 37105, "total_steps": 38480, "loss": 0.2735, "lr": 0.00011670212942381241, "epoch": 19.285343035343036, "percentage": 96.43, "elapsed_time": "1:27:22", "remaining_time": "0:03:14", "throughput": 1349.43, "total_tokens": 7074768}
|
| 7441 |
+
{"current_steps": 37110, "total_steps": 38480, "loss": 0.2817, "lr": 0.00011585663590856387, "epoch": 19.287941787941786, "percentage": 96.44, "elapsed_time": "1:27:23", "remaining_time": "0:03:13", "throughput": 1349.44, "total_tokens": 7075760}
|
| 7442 |
+
{"current_steps": 37115, "total_steps": 38480, "loss": 0.258, "lr": 0.0001150142044162955, "epoch": 19.29054054054054, "percentage": 96.45, "elapsed_time": "1:27:24", "remaining_time": "0:03:12", "throughput": 1349.46, "total_tokens": 7076720}
|
| 7443 |
+
{"current_steps": 37120, "total_steps": 38480, "loss": 0.2949, "lr": 0.00011417483512031368, "epoch": 19.293139293139294, "percentage": 96.47, "elapsed_time": "1:27:24", "remaining_time": "0:03:12", "throughput": 1349.47, "total_tokens": 7077712}
|
| 7444 |
+
{"current_steps": 37125, "total_steps": 38480, "loss": 0.2451, "lr": 0.00011333852819329693, "epoch": 19.295738045738045, "percentage": 96.48, "elapsed_time": "1:27:25", "remaining_time": "0:03:11", "throughput": 1349.47, "total_tokens": 7078576}
|
| 7445 |
+
{"current_steps": 37130, "total_steps": 38480, "loss": 0.2672, "lr": 0.00011250528380729263, "epoch": 19.2983367983368, "percentage": 96.49, "elapsed_time": "1:27:26", "remaining_time": "0:03:10", "throughput": 1349.47, "total_tokens": 7079504}
|
| 7446 |
+
{"current_steps": 37135, "total_steps": 38480, "loss": 0.2818, "lr": 0.00011167510213372034, "epoch": 19.300935550935552, "percentage": 96.5, "elapsed_time": "1:27:26", "remaining_time": "0:03:10", "throughput": 1349.48, "total_tokens": 7080464}
|
| 7447 |
+
{"current_steps": 37140, "total_steps": 38480, "loss": 0.246, "lr": 0.00011084798334336842, "epoch": 19.303534303534303, "percentage": 96.52, "elapsed_time": "1:27:27", "remaining_time": "0:03:09", "throughput": 1349.49, "total_tokens": 7081424}
|
| 7448 |
+
{"current_steps": 37145, "total_steps": 38480, "loss": 0.2303, "lr": 0.00011002392760639412, "epoch": 19.306133056133056, "percentage": 96.53, "elapsed_time": "1:27:28", "remaining_time": "0:03:08", "throughput": 1349.5, "total_tokens": 7082384}
|
| 7449 |
+
{"current_steps": 37150, "total_steps": 38480, "loss": 0.1996, "lr": 0.00010920293509232515, "epoch": 19.30873180873181, "percentage": 96.54, "elapsed_time": "1:27:28", "remaining_time": "0:03:07", "throughput": 1349.5, "total_tokens": 7083280}
|
| 7450 |
+
{"current_steps": 37155, "total_steps": 38480, "loss": 0.258, "lr": 0.0001083850059700614, "epoch": 19.31133056133056, "percentage": 96.56, "elapsed_time": "1:27:29", "remaining_time": "0:03:07", "throughput": 1349.52, "total_tokens": 7084272}
|
| 7451 |
+
{"current_steps": 37160, "total_steps": 38480, "loss": 0.2689, "lr": 0.00010757014040786661, "epoch": 19.313929313929314, "percentage": 96.57, "elapsed_time": "1:27:30", "remaining_time": "0:03:06", "throughput": 1349.53, "total_tokens": 7085264}
|
| 7452 |
+
{"current_steps": 37165, "total_steps": 38480, "loss": 0.2135, "lr": 0.00010675833857338001, "epoch": 19.316528066528065, "percentage": 96.58, "elapsed_time": "1:27:30", "remaining_time": "0:03:05", "throughput": 1349.54, "total_tokens": 7086192}
|
| 7453 |
+
{"current_steps": 37170, "total_steps": 38480, "loss": 0.2811, "lr": 0.00010594960063360969, "epoch": 19.31912681912682, "percentage": 96.6, "elapsed_time": "1:27:31", "remaining_time": "0:03:05", "throughput": 1349.55, "total_tokens": 7087152}
|
| 7454 |
+
{"current_steps": 37175, "total_steps": 38480, "loss": 0.202, "lr": 0.00010514392675493089, "epoch": 19.321725571725572, "percentage": 96.61, "elapsed_time": "1:27:32", "remaining_time": "0:03:04", "throughput": 1349.57, "total_tokens": 7088176}
|
| 7455 |
+
{"current_steps": 37180, "total_steps": 38480, "loss": 0.2619, "lr": 0.00010434131710309101, "epoch": 19.324324324324323, "percentage": 96.62, "elapsed_time": "1:27:32", "remaining_time": "0:03:03", "throughput": 1349.58, "total_tokens": 7089136}
|
| 7456 |
+
{"current_steps": 37185, "total_steps": 38480, "loss": 0.2785, "lr": 0.00010354177184320467, "epoch": 19.326923076923077, "percentage": 96.63, "elapsed_time": "1:27:33", "remaining_time": "0:03:02", "throughput": 1349.59, "total_tokens": 7090096}
|
| 7457 |
+
{"current_steps": 37190, "total_steps": 38480, "loss": 0.3101, "lr": 0.0001027452911397586, "epoch": 19.32952182952183, "percentage": 96.65, "elapsed_time": "1:27:34", "remaining_time": "0:03:02", "throughput": 1349.6, "total_tokens": 7091024}
|
| 7458 |
+
{"current_steps": 37195, "total_steps": 38480, "loss": 0.2658, "lr": 0.00010195187515660674, "epoch": 19.33212058212058, "percentage": 96.66, "elapsed_time": "1:27:34", "remaining_time": "0:03:01", "throughput": 1349.62, "total_tokens": 7092080}
|
| 7459 |
+
{"current_steps": 37200, "total_steps": 38480, "loss": 0.2473, "lr": 0.00010116152405697353, "epoch": 19.334719334719335, "percentage": 96.67, "elapsed_time": "1:27:35", "remaining_time": "0:03:00", "throughput": 1349.63, "total_tokens": 7093040}
|
| 7460 |
+
{"current_steps": 37205, "total_steps": 38480, "loss": 0.2304, "lr": 0.00010037423800345557, "epoch": 19.33731808731809, "percentage": 96.69, "elapsed_time": "1:27:36", "remaining_time": "0:03:00", "throughput": 1349.65, "total_tokens": 7094032}
|
| 7461 |
+
{"current_steps": 37210, "total_steps": 38480, "loss": 0.2537, "lr": 9.959001715801329e-05, "epoch": 19.33991683991684, "percentage": 96.7, "elapsed_time": "1:27:36", "remaining_time": "0:02:59", "throughput": 1349.66, "total_tokens": 7094992}
|
| 7462 |
+
{"current_steps": 37215, "total_steps": 38480, "loss": 0.2231, "lr": 9.8808861681981e-05, "epoch": 19.342515592515593, "percentage": 96.71, "elapsed_time": "1:27:37", "remaining_time": "0:02:58", "throughput": 1349.66, "total_tokens": 7095888}
|
| 7463 |
+
{"current_steps": 37220, "total_steps": 38480, "loss": 0.1998, "lr": 9.803077173606012e-05, "epoch": 19.345114345114347, "percentage": 96.73, "elapsed_time": "1:27:38", "remaining_time": "0:02:58", "throughput": 1349.67, "total_tokens": 7096848}
|
| 7464 |
+
{"current_steps": 37225, "total_steps": 38480, "loss": 0.2519, "lr": 9.725574748032428e-05, "epoch": 19.347713097713097, "percentage": 96.74, "elapsed_time": "1:27:38", "remaining_time": "0:02:57", "throughput": 1349.68, "total_tokens": 7097776}
|
| 7465 |
+
{"current_steps": 37230, "total_steps": 38480, "loss": 0.2828, "lr": 9.648378907421262e-05, "epoch": 19.35031185031185, "percentage": 96.75, "elapsed_time": "1:27:39", "remaining_time": "0:02:56", "throughput": 1349.68, "total_tokens": 7098704}
|
| 7466 |
+
{"current_steps": 37235, "total_steps": 38480, "loss": 0.2811, "lr": 9.571489667653477e-05, "epoch": 19.352910602910605, "percentage": 96.76, "elapsed_time": "1:27:40", "remaining_time": "0:02:55", "throughput": 1349.68, "total_tokens": 7099600}
|
| 7467 |
+
{"current_steps": 37240, "total_steps": 38480, "loss": 0.2265, "lr": 9.494907044547251e-05, "epoch": 19.355509355509355, "percentage": 96.78, "elapsed_time": "1:27:40", "remaining_time": "0:02:55", "throughput": 1349.7, "total_tokens": 7100592}
|
| 7468 |
+
{"current_steps": 37245, "total_steps": 38480, "loss": 0.2571, "lr": 9.418631053857318e-05, "epoch": 19.35810810810811, "percentage": 96.79, "elapsed_time": "1:27:41", "remaining_time": "0:02:54", "throughput": 1349.7, "total_tokens": 7101520}
|
| 7469 |
+
{"current_steps": 37250, "total_steps": 38480, "loss": 0.2622, "lr": 9.342661711275457e-05, "epoch": 19.36070686070686, "percentage": 96.8, "elapsed_time": "1:27:42", "remaining_time": "0:02:53", "throughput": 1349.71, "total_tokens": 7102480}
|
| 7470 |
+
{"current_steps": 37255, "total_steps": 38480, "loss": 0.2637, "lr": 9.266999032430333e-05, "epoch": 19.363305613305613, "percentage": 96.82, "elapsed_time": "1:27:42", "remaining_time": "0:02:53", "throughput": 1349.72, "total_tokens": 7103408}
|
| 7471 |
+
{"current_steps": 37260, "total_steps": 38480, "loss": 0.2487, "lr": 9.19164303288783e-05, "epoch": 19.365904365904367, "percentage": 96.83, "elapsed_time": "1:27:43", "remaining_time": "0:02:52", "throughput": 1349.73, "total_tokens": 7104368}
|
| 7472 |
+
{"current_steps": 37265, "total_steps": 38480, "loss": 0.2951, "lr": 9.116593728150213e-05, "epoch": 19.368503118503117, "percentage": 96.84, "elapsed_time": "1:27:44", "remaining_time": "0:02:51", "throughput": 1349.73, "total_tokens": 7105296}
|
| 7473 |
+
{"current_steps": 37270, "total_steps": 38480, "loss": 0.307, "lr": 9.041851133656963e-05, "epoch": 19.37110187110187, "percentage": 96.86, "elapsed_time": "1:27:44", "remaining_time": "0:02:50", "throughput": 1349.74, "total_tokens": 7106256}
|
| 7474 |
+
{"current_steps": 37275, "total_steps": 38480, "loss": 0.246, "lr": 8.967415264784283e-05, "epoch": 19.373700623700625, "percentage": 96.87, "elapsed_time": "1:27:45", "remaining_time": "0:02:50", "throughput": 1349.75, "total_tokens": 7107216}
|
| 7475 |
+
{"current_steps": 37280, "total_steps": 38480, "loss": 0.2792, "lr": 8.89328613684559e-05, "epoch": 19.376299376299375, "percentage": 96.88, "elapsed_time": "1:27:46", "remaining_time": "0:02:49", "throughput": 1349.76, "total_tokens": 7108176}
|
| 7476 |
+
{"current_steps": 37285, "total_steps": 38480, "loss": 0.2689, "lr": 8.819463765091184e-05, "epoch": 19.37889812889813, "percentage": 96.89, "elapsed_time": "1:27:46", "remaining_time": "0:02:48", "throughput": 1349.79, "total_tokens": 7109232}
|
| 7477 |
+
{"current_steps": 37290, "total_steps": 38480, "loss": 0.2626, "lr": 8.745948164707584e-05, "epoch": 19.381496881496883, "percentage": 96.91, "elapsed_time": "1:27:47", "remaining_time": "0:02:48", "throughput": 1349.8, "total_tokens": 7110224}
|
| 7478 |
+
{"current_steps": 37295, "total_steps": 38480, "loss": 0.2776, "lr": 8.672739350819192e-05, "epoch": 19.384095634095633, "percentage": 96.92, "elapsed_time": "1:27:48", "remaining_time": "0:02:47", "throughput": 1349.82, "total_tokens": 7111184}
|
| 7479 |
+
{"current_steps": 37300, "total_steps": 38480, "loss": 0.2726, "lr": 8.59983733848646e-05, "epoch": 19.386694386694387, "percentage": 96.93, "elapsed_time": "1:27:48", "remaining_time": "0:02:46", "throughput": 1349.81, "total_tokens": 7112080}
|
| 7480 |
+
{"current_steps": 37305, "total_steps": 38480, "loss": 0.3026, "lr": 8.527242142707225e-05, "epoch": 19.38929313929314, "percentage": 96.95, "elapsed_time": "1:27:49", "remaining_time": "0:02:45", "throughput": 1349.81, "total_tokens": 7112976}
|
| 7481 |
+
{"current_steps": 37310, "total_steps": 38480, "loss": 0.3218, "lr": 8.454953778416207e-05, "epoch": 19.39189189189189, "percentage": 96.96, "elapsed_time": "1:27:50", "remaining_time": "0:02:45", "throughput": 1349.84, "total_tokens": 7114000}
|
| 7482 |
+
{"current_steps": 37315, "total_steps": 38480, "loss": 0.2577, "lr": 8.382972260484511e-05, "epoch": 19.394490644490645, "percentage": 96.97, "elapsed_time": "1:27:50", "remaining_time": "0:02:44", "throughput": 1349.84, "total_tokens": 7114928}
|
| 7483 |
+
{"current_steps": 37320, "total_steps": 38480, "loss": 0.2734, "lr": 8.311297603720624e-05, "epoch": 19.397089397089395, "percentage": 96.99, "elapsed_time": "1:27:51", "remaining_time": "0:02:43", "throughput": 1349.86, "total_tokens": 7115952}
|
| 7484 |
+
{"current_steps": 37325, "total_steps": 38480, "loss": 0.2264, "lr": 8.239929822869751e-05, "epoch": 19.39968814968815, "percentage": 97.0, "elapsed_time": "1:27:52", "remaining_time": "0:02:43", "throughput": 1349.87, "total_tokens": 7116912}
|
| 7485 |
+
{"current_steps": 37330, "total_steps": 38480, "loss": 0.3141, "lr": 8.168868932613814e-05, "epoch": 19.402286902286903, "percentage": 97.01, "elapsed_time": "1:27:52", "remaining_time": "0:02:42", "throughput": 1349.88, "total_tokens": 7117872}
|
| 7486 |
+
{"current_steps": 37335, "total_steps": 38480, "loss": 0.3166, "lr": 8.098114947571955e-05, "epoch": 19.404885654885653, "percentage": 97.02, "elapsed_time": "1:27:53", "remaining_time": "0:02:41", "throughput": 1349.9, "total_tokens": 7118896}
|
| 7487 |
+
{"current_steps": 37340, "total_steps": 38480, "loss": 0.2609, "lr": 8.02766788229986e-05, "epoch": 19.407484407484407, "percentage": 97.04, "elapsed_time": "1:27:54", "remaining_time": "0:02:41", "throughput": 1349.91, "total_tokens": 7119856}
|
| 7488 |
+
{"current_steps": 37345, "total_steps": 38480, "loss": 0.3351, "lr": 7.957527751289939e-05, "epoch": 19.41008316008316, "percentage": 97.05, "elapsed_time": "1:27:54", "remaining_time": "0:02:40", "throughput": 1349.93, "total_tokens": 7120880}
|
| 7489 |
+
{"current_steps": 37350, "total_steps": 38480, "loss": 0.2334, "lr": 7.887694568972147e-05, "epoch": 19.41268191268191, "percentage": 97.06, "elapsed_time": "1:27:55", "remaining_time": "0:02:39", "throughput": 1349.95, "total_tokens": 7121872}
|
| 7490 |
+
{"current_steps": 37355, "total_steps": 38480, "loss": 0.2386, "lr": 7.818168349712328e-05, "epoch": 19.415280665280665, "percentage": 97.08, "elapsed_time": "1:27:56", "remaining_time": "0:02:38", "throughput": 1349.96, "total_tokens": 7122800}
|
| 7491 |
+
{"current_steps": 37360, "total_steps": 38480, "loss": 0.2231, "lr": 7.748949107814041e-05, "epoch": 19.41787941787942, "percentage": 97.09, "elapsed_time": "1:27:56", "remaining_time": "0:02:38", "throughput": 1349.97, "total_tokens": 7123792}
|
| 7492 |
+
{"current_steps": 37365, "total_steps": 38480, "loss": 0.2146, "lr": 7.680036857517225e-05, "epoch": 19.42047817047817, "percentage": 97.1, "elapsed_time": "1:27:57", "remaining_time": "0:02:37", "throughput": 1349.98, "total_tokens": 7124720}
|
| 7493 |
+
{"current_steps": 37370, "total_steps": 38480, "loss": 0.2724, "lr": 7.61143161299871e-05, "epoch": 19.423076923076923, "percentage": 97.12, "elapsed_time": "1:27:58", "remaining_time": "0:02:36", "throughput": 1349.98, "total_tokens": 7125616}
|
| 7494 |
+
{"current_steps": 37375, "total_steps": 38480, "loss": 0.2603, "lr": 7.543133388372369e-05, "epoch": 19.425675675675677, "percentage": 97.13, "elapsed_time": "1:27:58", "remaining_time": "0:02:36", "throughput": 1349.98, "total_tokens": 7126544}
|
| 7495 |
+
{"current_steps": 37380, "total_steps": 38480, "loss": 0.2518, "lr": 7.475142197688632e-05, "epoch": 19.428274428274428, "percentage": 97.14, "elapsed_time": "1:27:59", "remaining_time": "0:02:35", "throughput": 1350.0, "total_tokens": 7127568}
|
| 7496 |
+
{"current_steps": 37385, "total_steps": 38480, "loss": 0.2429, "lr": 7.407458054934979e-05, "epoch": 19.43087318087318, "percentage": 97.15, "elapsed_time": "1:28:00", "remaining_time": "0:02:34", "throughput": 1350.0, "total_tokens": 7128464}
|
| 7497 |
+
{"current_steps": 37390, "total_steps": 38480, "loss": 0.2656, "lr": 7.340080974035767e-05, "epoch": 19.433471933471935, "percentage": 97.17, "elapsed_time": "1:28:01", "remaining_time": "0:02:33", "throughput": 1350.01, "total_tokens": 7129392}
|
| 7498 |
+
{"current_steps": 37395, "total_steps": 38480, "loss": 0.3002, "lr": 7.273010968851745e-05, "epoch": 19.436070686070686, "percentage": 97.18, "elapsed_time": "1:28:01", "remaining_time": "0:02:33", "throughput": 1350.02, "total_tokens": 7130352}
|
| 7499 |
+
{"current_steps": 37400, "total_steps": 38480, "loss": 0.2699, "lr": 7.206248053181042e-05, "epoch": 19.43866943866944, "percentage": 97.19, "elapsed_time": "1:28:02", "remaining_time": "0:02:32", "throughput": 1350.03, "total_tokens": 7131344}
|
| 7500 |
+
{"current_steps": 37405, "total_steps": 38480, "loss": 0.2977, "lr": 7.139792240758503e-05, "epoch": 19.44126819126819, "percentage": 97.21, "elapsed_time": "1:28:03", "remaining_time": "0:02:31", "throughput": 1350.04, "total_tokens": 7132272}
|
| 7501 |
+
{"current_steps": 37410, "total_steps": 38480, "loss": 0.2365, "lr": 7.07364354525536e-05, "epoch": 19.443866943866944, "percentage": 97.22, "elapsed_time": "1:28:03", "remaining_time": "0:02:31", "throughput": 1350.07, "total_tokens": 7133392}
|
| 7502 |
+
{"current_steps": 37415, "total_steps": 38480, "loss": 0.2171, "lr": 7.00780198028006e-05, "epoch": 19.446465696465697, "percentage": 97.23, "elapsed_time": "1:28:04", "remaining_time": "0:02:30", "throughput": 1350.08, "total_tokens": 7134352}
|
| 7503 |
+
{"current_steps": 37420, "total_steps": 38480, "loss": 0.2176, "lr": 6.9422675593781e-05, "epoch": 19.449064449064448, "percentage": 97.25, "elapsed_time": "1:28:05", "remaining_time": "0:02:29", "throughput": 1350.1, "total_tokens": 7135344}
|
| 7504 |
+
{"current_steps": 37425, "total_steps": 38480, "loss": 0.2556, "lr": 6.877040296031034e-05, "epoch": 19.4516632016632, "percentage": 97.26, "elapsed_time": "1:28:05", "remaining_time": "0:02:29", "throughput": 1350.11, "total_tokens": 7136304}
|
| 7505 |
+
{"current_steps": 37430, "total_steps": 38480, "loss": 0.261, "lr": 6.812120203657956e-05, "epoch": 19.454261954261955, "percentage": 97.27, "elapsed_time": "1:28:06", "remaining_time": "0:02:28", "throughput": 1350.11, "total_tokens": 7137200}
|
| 7506 |
+
{"current_steps": 37435, "total_steps": 38480, "loss": 0.2702, "lr": 6.747507295614186e-05, "epoch": 19.456860706860706, "percentage": 97.28, "elapsed_time": "1:28:07", "remaining_time": "0:02:27", "throughput": 1350.12, "total_tokens": 7138128}
|
| 7507 |
+
{"current_steps": 37440, "total_steps": 38480, "loss": 0.2143, "lr": 6.683201585192588e-05, "epoch": 19.45945945945946, "percentage": 97.3, "elapsed_time": "1:28:07", "remaining_time": "0:02:26", "throughput": 1350.11, "total_tokens": 7139024}
|
| 7508 |
+
{"current_steps": 37445, "total_steps": 38480, "loss": 0.3026, "lr": 6.619203085621916e-05, "epoch": 19.462058212058214, "percentage": 97.31, "elapsed_time": "1:28:08", "remaining_time": "0:02:26", "throughput": 1350.13, "total_tokens": 7140016}
|
| 7509 |
+
{"current_steps": 37450, "total_steps": 38480, "loss": 0.2759, "lr": 6.55551181006847e-05, "epoch": 19.464656964656964, "percentage": 97.32, "elapsed_time": "1:28:09", "remaining_time": "0:02:25", "throughput": 1350.13, "total_tokens": 7140912}
|
| 7510 |
+
{"current_steps": 37455, "total_steps": 38480, "loss": 0.2425, "lr": 6.492127771634937e-05, "epoch": 19.467255717255718, "percentage": 97.34, "elapsed_time": "1:28:09", "remaining_time": "0:02:24", "throughput": 1350.14, "total_tokens": 7141872}
|
| 7511 |
+
{"current_steps": 37460, "total_steps": 38480, "loss": 0.2821, "lr": 6.42905098336105e-05, "epoch": 19.46985446985447, "percentage": 97.35, "elapsed_time": "1:28:10", "remaining_time": "0:02:24", "throughput": 1350.15, "total_tokens": 7142864}
|
| 7512 |
+
{"current_steps": 37465, "total_steps": 38480, "loss": 0.2831, "lr": 6.3662814582231e-05, "epoch": 19.472453222453222, "percentage": 97.36, "elapsed_time": "1:28:11", "remaining_time": "0:02:23", "throughput": 1350.15, "total_tokens": 7143760}
|
| 7513 |
+
{"current_steps": 37470, "total_steps": 38480, "loss": 0.2433, "lr": 6.303819209134254e-05, "epoch": 19.475051975051976, "percentage": 97.38, "elapsed_time": "1:28:11", "remaining_time": "0:02:22", "throughput": 1350.16, "total_tokens": 7144688}
|
| 7514 |
+
{"current_steps": 37475, "total_steps": 38480, "loss": 0.2286, "lr": 6.241664248944567e-05, "epoch": 19.477650727650726, "percentage": 97.39, "elapsed_time": "1:28:12", "remaining_time": "0:02:21", "throughput": 1350.16, "total_tokens": 7145584}
|
| 7515 |
+
{"current_steps": 37480, "total_steps": 38480, "loss": 0.229, "lr": 6.179816590440811e-05, "epoch": 19.48024948024948, "percentage": 97.4, "elapsed_time": "1:28:13", "remaining_time": "0:02:21", "throughput": 1350.16, "total_tokens": 7146480}
|
| 7516 |
+
{"current_steps": 37485, "total_steps": 38480, "loss": 0.2731, "lr": 6.118276246346477e-05, "epoch": 19.482848232848234, "percentage": 97.41, "elapsed_time": "1:28:13", "remaining_time": "0:02:20", "throughput": 1350.17, "total_tokens": 7147472}
|
| 7517 |
+
{"current_steps": 37490, "total_steps": 38480, "loss": 0.2405, "lr": 6.057043229321768e-05, "epoch": 19.485446985446984, "percentage": 97.43, "elapsed_time": "1:28:14", "remaining_time": "0:02:19", "throughput": 1350.18, "total_tokens": 7148400}
|
| 7518 |
+
{"current_steps": 37495, "total_steps": 38480, "loss": 0.2714, "lr": 5.9961175519639415e-05, "epoch": 19.488045738045738, "percentage": 97.44, "elapsed_time": "1:28:15", "remaining_time": "0:02:19", "throughput": 1350.18, "total_tokens": 7149296}
|
| 7519 |
+
{"current_steps": 37500, "total_steps": 38480, "loss": 0.2307, "lr": 5.935499226806806e-05, "epoch": 19.490644490644492, "percentage": 97.45, "elapsed_time": "1:28:15", "remaining_time": "0:02:18", "throughput": 1350.19, "total_tokens": 7150288}
|
| 7520 |
+
{"current_steps": 37505, "total_steps": 38480, "loss": 0.2159, "lr": 5.8751882663210514e-05, "epoch": 19.493243243243242, "percentage": 97.47, "elapsed_time": "1:28:16", "remaining_time": "0:02:17", "throughput": 1350.19, "total_tokens": 7151184}
|
| 7521 |
+
{"current_steps": 37510, "total_steps": 38480, "loss": 0.3098, "lr": 5.815184682913921e-05, "epoch": 19.495841995841996, "percentage": 97.48, "elapsed_time": "1:28:17", "remaining_time": "0:02:16", "throughput": 1350.21, "total_tokens": 7152208}
|
| 7522 |
+
{"current_steps": 37515, "total_steps": 38480, "loss": 0.2654, "lr": 5.755488488929705e-05, "epoch": 19.49844074844075, "percentage": 97.49, "elapsed_time": "1:28:17", "remaining_time": "0:02:16", "throughput": 1350.23, "total_tokens": 7153232}
|
| 7523 |
+
{"current_steps": 37520, "total_steps": 38480, "loss": 0.2585, "lr": 5.696099696649248e-05, "epoch": 19.5010395010395, "percentage": 97.51, "elapsed_time": "1:28:18", "remaining_time": "0:02:15", "throughput": 1350.25, "total_tokens": 7154192}
|
| 7524 |
+
{"current_steps": 37525, "total_steps": 38480, "loss": 0.3062, "lr": 5.637018318290443e-05, "epoch": 19.503638253638254, "percentage": 97.52, "elapsed_time": "1:28:19", "remaining_time": "0:02:14", "throughput": 1350.26, "total_tokens": 7155216}
|
| 7525 |
+
{"current_steps": 37530, "total_steps": 38480, "loss": 0.2368, "lr": 5.578244366007567e-05, "epoch": 19.506237006237008, "percentage": 97.53, "elapsed_time": "1:28:19", "remaining_time": "0:02:14", "throughput": 1350.27, "total_tokens": 7156144}
|
| 7526 |
+
{"current_steps": 37535, "total_steps": 38480, "loss": 0.244, "lr": 5.5197778518917823e-05, "epoch": 19.508835758835758, "percentage": 97.54, "elapsed_time": "1:28:20", "remaining_time": "0:02:13", "throughput": 1350.27, "total_tokens": 7157072}
|
| 7527 |
+
{"current_steps": 37540, "total_steps": 38480, "loss": 0.3044, "lr": 5.4616187879709674e-05, "epoch": 19.511434511434512, "percentage": 97.56, "elapsed_time": "1:28:21", "remaining_time": "0:02:12", "throughput": 1350.28, "total_tokens": 7158032}
|
| 7528 |
+
{"current_steps": 37545, "total_steps": 38480, "loss": 0.252, "lr": 5.403767186210217e-05, "epoch": 19.514033264033262, "percentage": 97.57, "elapsed_time": "1:28:21", "remaining_time": "0:02:12", "throughput": 1350.29, "total_tokens": 7158992}
|
| 7529 |
+
{"current_steps": 37550, "total_steps": 38480, "loss": 0.2763, "lr": 5.346223058510679e-05, "epoch": 19.516632016632016, "percentage": 97.58, "elapsed_time": "1:28:22", "remaining_time": "0:02:11", "throughput": 1350.31, "total_tokens": 7159984}
|
| 7530 |
+
{"current_steps": 37555, "total_steps": 38480, "loss": 0.1997, "lr": 5.2889864167105504e-05, "epoch": 19.51923076923077, "percentage": 97.6, "elapsed_time": "1:28:23", "remaining_time": "0:02:10", "throughput": 1350.32, "total_tokens": 7160976}
|
| 7531 |
+
{"current_steps": 37560, "total_steps": 38480, "loss": 0.2706, "lr": 5.232057272584911e-05, "epoch": 19.52182952182952, "percentage": 97.61, "elapsed_time": "1:28:23", "remaining_time": "0:02:09", "throughput": 1350.35, "total_tokens": 7162032}
|
| 7532 |
+
{"current_steps": 37565, "total_steps": 38480, "loss": 0.2109, "lr": 5.1754356378453935e-05, "epoch": 19.524428274428274, "percentage": 97.62, "elapsed_time": "1:28:24", "remaining_time": "0:02:09", "throughput": 1350.35, "total_tokens": 7162928}
|
| 7533 |
+
{"current_steps": 37570, "total_steps": 38480, "loss": 0.2261, "lr": 5.119121524140346e-05, "epoch": 19.527027027027028, "percentage": 97.64, "elapsed_time": "1:28:25", "remaining_time": "0:02:08", "throughput": 1350.35, "total_tokens": 7163824}
|
| 7534 |
+
{"current_steps": 37575, "total_steps": 38480, "loss": 0.2552, "lr": 5.0631149430548356e-05, "epoch": 19.52962577962578, "percentage": 97.65, "elapsed_time": "1:28:25", "remaining_time": "0:02:07", "throughput": 1350.36, "total_tokens": 7164816}
|
| 7535 |
+
{"current_steps": 37580, "total_steps": 38480, "loss": 0.295, "lr": 5.007415906111145e-05, "epoch": 19.532224532224532, "percentage": 97.66, "elapsed_time": "1:28:26", "remaining_time": "0:02:07", "throughput": 1350.37, "total_tokens": 7165744}
|
| 7536 |
+
{"current_steps": 37585, "total_steps": 38480, "loss": 0.2913, "lr": 4.952024424767443e-05, "epoch": 19.534823284823286, "percentage": 97.67, "elapsed_time": "1:28:27", "remaining_time": "0:02:06", "throughput": 1350.37, "total_tokens": 7166672}
|
| 7537 |
+
{"current_steps": 37590, "total_steps": 38480, "loss": 0.2275, "lr": 4.8969405104192784e-05, "epoch": 19.537422037422036, "percentage": 97.69, "elapsed_time": "1:28:27", "remaining_time": "0:02:05", "throughput": 1350.38, "total_tokens": 7167600}
|
| 7538 |
+
{"current_steps": 37595, "total_steps": 38480, "loss": 0.2291, "lr": 4.842164174398755e-05, "epoch": 19.54002079002079, "percentage": 97.7, "elapsed_time": "1:28:28", "remaining_time": "0:02:04", "throughput": 1350.38, "total_tokens": 7168528}
|
| 7539 |
+
{"current_steps": 37600, "total_steps": 38480, "loss": 0.2739, "lr": 4.787695427974525e-05, "epoch": 19.542619542619544, "percentage": 97.71, "elapsed_time": "1:28:29", "remaining_time": "0:02:04", "throughput": 1350.39, "total_tokens": 7169456}
|
| 7540 |
+
{"current_steps": 37605, "total_steps": 38480, "loss": 0.2494, "lr": 4.733534282352458e-05, "epoch": 19.545218295218294, "percentage": 97.73, "elapsed_time": "1:28:29", "remaining_time": "0:02:03", "throughput": 1350.4, "total_tokens": 7170416}
|
| 7541 |
+
{"current_steps": 37610, "total_steps": 38480, "loss": 0.2825, "lr": 4.6796807486743083e-05, "epoch": 19.54781704781705, "percentage": 97.74, "elapsed_time": "1:28:30", "remaining_time": "0:02:02", "throughput": 1350.4, "total_tokens": 7171312}
|
| 7542 |
+
{"current_steps": 37615, "total_steps": 38480, "loss": 0.2937, "lr": 4.6261348380193796e-05, "epoch": 19.5504158004158, "percentage": 97.75, "elapsed_time": "1:28:31", "remaining_time": "0:02:02", "throughput": 1350.4, "total_tokens": 7172240}
|
| 7543 |
+
{"current_steps": 37620, "total_steps": 38480, "loss": 0.2651, "lr": 4.5728965614031944e-05, "epoch": 19.553014553014552, "percentage": 97.77, "elapsed_time": "1:28:31", "remaining_time": "0:02:01", "throughput": 1350.41, "total_tokens": 7173200}
|
| 7544 |
+
{"current_steps": 37625, "total_steps": 38480, "loss": 0.2208, "lr": 4.5199659297781576e-05, "epoch": 19.555613305613306, "percentage": 97.78, "elapsed_time": "1:28:32", "remaining_time": "0:02:00", "throughput": 1350.43, "total_tokens": 7174192}
|
| 7545 |
+
{"current_steps": 37630, "total_steps": 38480, "loss": 0.2577, "lr": 4.467342954033393e-05, "epoch": 19.558212058212057, "percentage": 97.79, "elapsed_time": "1:28:33", "remaining_time": "0:02:00", "throughput": 1350.44, "total_tokens": 7175152}
|
| 7546 |
+
{"current_steps": 37635, "total_steps": 38480, "loss": 0.2391, "lr": 4.4150276449945735e-05, "epoch": 19.56081081081081, "percentage": 97.8, "elapsed_time": "1:28:33", "remaining_time": "0:01:59", "throughput": 1350.46, "total_tokens": 7176144}
|
| 7547 |
+
{"current_steps": 37640, "total_steps": 38480, "loss": 0.2264, "lr": 4.3630200134244234e-05, "epoch": 19.563409563409564, "percentage": 97.82, "elapsed_time": "1:28:34", "remaining_time": "0:01:58", "throughput": 1350.46, "total_tokens": 7177072}
|
| 7548 |
+
{"current_steps": 37645, "total_steps": 38480, "loss": 0.2751, "lr": 4.311320070022051e-05, "epoch": 19.566008316008315, "percentage": 97.83, "elapsed_time": "1:28:35", "remaining_time": "0:01:57", "throughput": 1350.48, "total_tokens": 7178064}
|
| 7549 |
+
{"current_steps": 37650, "total_steps": 38480, "loss": 0.2544, "lr": 4.2599278254232816e-05, "epoch": 19.56860706860707, "percentage": 97.84, "elapsed_time": "1:28:35", "remaining_time": "0:01:57", "throughput": 1350.48, "total_tokens": 7178960}
|
| 7550 |
+
{"current_steps": 37655, "total_steps": 38480, "loss": 0.2702, "lr": 4.208843290200992e-05, "epoch": 19.571205821205822, "percentage": 97.86, "elapsed_time": "1:28:36", "remaining_time": "0:01:56", "throughput": 1350.48, "total_tokens": 7179856}
|
| 7551 |
+
{"current_steps": 37660, "total_steps": 38480, "loss": 0.2748, "lr": 4.158066474864108e-05, "epoch": 19.573804573804573, "percentage": 97.87, "elapsed_time": "1:28:37", "remaining_time": "0:01:55", "throughput": 1350.48, "total_tokens": 7180784}
|
| 7552 |
+
{"current_steps": 37665, "total_steps": 38480, "loss": 0.26, "lr": 4.107597389858775e-05, "epoch": 19.576403326403327, "percentage": 97.88, "elapsed_time": "1:28:37", "remaining_time": "0:01:55", "throughput": 1350.49, "total_tokens": 7181744}
|
| 7553 |
+
{"current_steps": 37670, "total_steps": 38480, "loss": 0.2776, "lr": 4.0574360455678524e-05, "epoch": 19.57900207900208, "percentage": 97.9, "elapsed_time": "1:28:38", "remaining_time": "0:01:54", "throughput": 1350.5, "total_tokens": 7182672}
|
| 7554 |
+
{"current_steps": 37675, "total_steps": 38480, "loss": 0.2967, "lr": 4.007582452310587e-05, "epoch": 19.58160083160083, "percentage": 97.91, "elapsed_time": "1:28:39", "remaining_time": "0:01:53", "throughput": 1350.5, "total_tokens": 7183600}
|
| 7555 |
+
{"current_steps": 37680, "total_steps": 38480, "loss": 0.264, "lr": 3.958036620343108e-05, "epoch": 19.584199584199585, "percentage": 97.92, "elapsed_time": "1:28:39", "remaining_time": "0:01:52", "throughput": 1350.53, "total_tokens": 7184656}
|
| 7556 |
+
{"current_steps": 37685, "total_steps": 38480, "loss": 0.2982, "lr": 3.908798559858095e-05, "epoch": 19.58679833679834, "percentage": 97.93, "elapsed_time": "1:28:40", "remaining_time": "0:01:52", "throughput": 1350.54, "total_tokens": 7185648}
|
| 7557 |
+
{"current_steps": 37690, "total_steps": 38480, "loss": 0.257, "lr": 3.859868280984946e-05, "epoch": 19.58939708939709, "percentage": 97.95, "elapsed_time": "1:28:41", "remaining_time": "0:01:51", "throughput": 1350.55, "total_tokens": 7186576}
|
| 7558 |
+
{"current_steps": 37695, "total_steps": 38480, "loss": 0.2648, "lr": 3.811245793789941e-05, "epoch": 19.591995841995843, "percentage": 97.96, "elapsed_time": "1:28:41", "remaining_time": "0:01:50", "throughput": 1350.55, "total_tokens": 7187504}
|
| 7559 |
+
{"current_steps": 37700, "total_steps": 38480, "loss": 0.2501, "lr": 3.762931108275746e-05, "epoch": 19.594594594594593, "percentage": 97.97, "elapsed_time": "1:28:42", "remaining_time": "0:01:50", "throughput": 1350.58, "total_tokens": 7188528}
|
| 7560 |
+
{"current_steps": 37705, "total_steps": 38480, "loss": 0.2892, "lr": 3.714924234382078e-05, "epoch": 19.597193347193347, "percentage": 97.99, "elapsed_time": "1:28:43", "remaining_time": "0:01:49", "throughput": 1350.59, "total_tokens": 7189552}
|
| 7561 |
+
{"current_steps": 37710, "total_steps": 38480, "loss": 0.2499, "lr": 3.667225181984701e-05, "epoch": 19.5997920997921, "percentage": 98.0, "elapsed_time": "1:28:43", "remaining_time": "0:01:48", "throughput": 1350.61, "total_tokens": 7190576}
|
| 7562 |
+
{"current_steps": 37715, "total_steps": 38480, "loss": 0.2607, "lr": 3.619833960896768e-05, "epoch": 19.60239085239085, "percentage": 98.01, "elapsed_time": "1:28:44", "remaining_time": "0:01:48", "throughput": 1350.62, "total_tokens": 7191536}
|
| 7563 |
+
{"current_steps": 37720, "total_steps": 38480, "loss": 0.299, "lr": 3.572750580867645e-05, "epoch": 19.604989604989605, "percentage": 98.02, "elapsed_time": "1:28:45", "remaining_time": "0:01:47", "throughput": 1350.64, "total_tokens": 7192528}
|
| 7564 |
+
{"current_steps": 37725, "total_steps": 38480, "loss": 0.2781, "lr": 3.525975051583585e-05, "epoch": 19.60758835758836, "percentage": 98.04, "elapsed_time": "1:28:45", "remaining_time": "0:01:46", "throughput": 1350.64, "total_tokens": 7193424}
|
| 7565 |
+
{"current_steps": 37730, "total_steps": 38480, "loss": 0.2218, "lr": 3.47950738266739e-05, "epoch": 19.61018711018711, "percentage": 98.05, "elapsed_time": "1:28:46", "remaining_time": "0:01:45", "throughput": 1350.64, "total_tokens": 7194352}
|
| 7566 |
+
{"current_steps": 37735, "total_steps": 38480, "loss": 0.2645, "lr": 3.433347583678581e-05, "epoch": 19.612785862785863, "percentage": 98.06, "elapsed_time": "1:28:47", "remaining_time": "0:01:45", "throughput": 1350.66, "total_tokens": 7195376}
|
| 7567 |
+
{"current_steps": 37740, "total_steps": 38480, "loss": 0.2976, "lr": 3.387495664113227e-05, "epoch": 19.615384615384617, "percentage": 98.08, "elapsed_time": "1:28:47", "remaining_time": "0:01:44", "throughput": 1350.67, "total_tokens": 7196336}
|
| 7568 |
+
{"current_steps": 37745, "total_steps": 38480, "loss": 0.2881, "lr": 3.3419516334044496e-05, "epoch": 19.617983367983367, "percentage": 98.09, "elapsed_time": "1:28:48", "remaining_time": "0:01:43", "throughput": 1350.69, "total_tokens": 7197360}
|
| 7569 |
+
{"current_steps": 37750, "total_steps": 38480, "loss": 0.258, "lr": 3.296715500921421e-05, "epoch": 19.62058212058212, "percentage": 98.1, "elapsed_time": "1:28:49", "remaining_time": "0:01:43", "throughput": 1350.7, "total_tokens": 7198320}
|
| 7570 |
+
{"current_steps": 37755, "total_steps": 38480, "loss": 0.2006, "lr": 3.251787275970363e-05, "epoch": 19.623180873180875, "percentage": 98.12, "elapsed_time": "1:28:49", "remaining_time": "0:01:42", "throughput": 1350.71, "total_tokens": 7199280}
|
| 7571 |
+
{"current_steps": 37760, "total_steps": 38480, "loss": 0.2157, "lr": 3.207166967794051e-05, "epoch": 19.625779625779625, "percentage": 98.13, "elapsed_time": "1:28:50", "remaining_time": "0:01:41", "throughput": 1350.72, "total_tokens": 7200240}
|
| 7572 |
+
{"current_steps": 37765, "total_steps": 38480, "loss": 0.2847, "lr": 3.162854585572139e-05, "epoch": 19.62837837837838, "percentage": 98.14, "elapsed_time": "1:28:51", "remaining_time": "0:01:40", "throughput": 1350.73, "total_tokens": 7201168}
|
| 7573 |
+
{"current_steps": 37770, "total_steps": 38480, "loss": 0.2522, "lr": 3.11885013842067e-05, "epoch": 19.63097713097713, "percentage": 98.15, "elapsed_time": "1:28:51", "remaining_time": "0:01:40", "throughput": 1350.74, "total_tokens": 7202128}
|
| 7574 |
+
{"current_steps": 37775, "total_steps": 38480, "loss": 0.2248, "lr": 3.075153635392236e-05, "epoch": 19.633575883575883, "percentage": 98.17, "elapsed_time": "1:28:52", "remaining_time": "0:01:39", "throughput": 1350.74, "total_tokens": 7203056}
|
| 7575 |
+
{"current_steps": 37780, "total_steps": 38480, "loss": 0.2263, "lr": 3.031765085476479e-05, "epoch": 19.636174636174637, "percentage": 98.18, "elapsed_time": "1:28:53", "remaining_time": "0:01:38", "throughput": 1350.75, "total_tokens": 7203984}
|
| 7576 |
+
{"current_steps": 37785, "total_steps": 38480, "loss": 0.2927, "lr": 2.9886844975992585e-05, "epoch": 19.638773388773387, "percentage": 98.19, "elapsed_time": "1:28:53", "remaining_time": "0:01:38", "throughput": 1350.75, "total_tokens": 7204880}
|
| 7577 |
+
{"current_steps": 37790, "total_steps": 38480, "loss": 0.3598, "lr": 2.9459118806234863e-05, "epoch": 19.64137214137214, "percentage": 98.21, "elapsed_time": "1:28:54", "remaining_time": "0:01:37", "throughput": 1350.76, "total_tokens": 7205840}
|
| 7578 |
+
{"current_steps": 37795, "total_steps": 38480, "loss": 0.2697, "lr": 2.903447243348289e-05, "epoch": 19.643970893970895, "percentage": 98.22, "elapsed_time": "1:28:55", "remaining_time": "0:01:36", "throughput": 1350.77, "total_tokens": 7206832}
|
| 7579 |
+
{"current_steps": 37800, "total_steps": 38480, "loss": 0.2271, "lr": 2.8612905945098463e-05, "epoch": 19.646569646569645, "percentage": 98.23, "elapsed_time": "1:28:56", "remaining_time": "0:01:35", "throughput": 1350.78, "total_tokens": 7207792}
|
| 7580 |
+
{"current_steps": 37805, "total_steps": 38480, "loss": 0.244, "lr": 2.8194419427805538e-05, "epoch": 19.6491683991684, "percentage": 98.25, "elapsed_time": "1:28:56", "remaining_time": "0:01:35", "throughput": 1350.79, "total_tokens": 7208752}
|
| 7581 |
+
{"current_steps": 37810, "total_steps": 38480, "loss": 0.2467, "lr": 2.7779012967700243e-05, "epoch": 19.651767151767153, "percentage": 98.26, "elapsed_time": "1:28:57", "remaining_time": "0:01:34", "throughput": 1350.79, "total_tokens": 7209648}
|
| 7582 |
+
{"current_steps": 37815, "total_steps": 38480, "loss": 0.2647, "lr": 2.736668665023756e-05, "epoch": 19.654365904365903, "percentage": 98.27, "elapsed_time": "1:28:58", "remaining_time": "0:01:33", "throughput": 1350.8, "total_tokens": 7210608}
|
| 7583 |
+
{"current_steps": 37820, "total_steps": 38480, "loss": 0.2768, "lr": 2.6957440560246294e-05, "epoch": 19.656964656964657, "percentage": 98.28, "elapsed_time": "1:28:58", "remaining_time": "0:01:33", "throughput": 1350.8, "total_tokens": 7211504}
|
| 7584 |
+
{"current_steps": 37825, "total_steps": 38480, "loss": 0.2204, "lr": 2.6551274781915768e-05, "epoch": 19.65956340956341, "percentage": 98.3, "elapsed_time": "1:28:59", "remaining_time": "0:01:32", "throughput": 1350.81, "total_tokens": 7212464}
|
| 7585 |
+
{"current_steps": 37830, "total_steps": 38480, "loss": 0.2791, "lr": 2.614818939880581e-05, "epoch": 19.66216216216216, "percentage": 98.31, "elapsed_time": "1:29:00", "remaining_time": "0:01:31", "throughput": 1350.81, "total_tokens": 7213360}
|
| 7586 |
+
{"current_steps": 37835, "total_steps": 38480, "loss": 0.202, "lr": 2.574818449383842e-05, "epoch": 19.664760914760915, "percentage": 98.32, "elapsed_time": "1:29:00", "remaining_time": "0:01:31", "throughput": 1350.83, "total_tokens": 7214416}
|
| 7587 |
+
{"current_steps": 37840, "total_steps": 38480, "loss": 0.3317, "lr": 2.5351260149306108e-05, "epoch": 19.66735966735967, "percentage": 98.34, "elapsed_time": "1:29:01", "remaining_time": "0:01:30", "throughput": 1350.85, "total_tokens": 7215408}
|
| 7588 |
+
{"current_steps": 37845, "total_steps": 38480, "loss": 0.2848, "lr": 2.495741644686522e-05, "epoch": 19.66995841995842, "percentage": 98.35, "elapsed_time": "1:29:02", "remaining_time": "0:01:29", "throughput": 1350.87, "total_tokens": 7216432}
|
| 7589 |
+
{"current_steps": 37850, "total_steps": 38480, "loss": 0.2743, "lr": 2.4566653467539278e-05, "epoch": 19.672557172557173, "percentage": 98.36, "elapsed_time": "1:29:02", "remaining_time": "0:01:28", "throughput": 1350.88, "total_tokens": 7217424}
|
| 7590 |
+
{"current_steps": 37855, "total_steps": 38480, "loss": 0.2148, "lr": 2.4178971291715644e-05, "epoch": 19.675155925155924, "percentage": 98.38, "elapsed_time": "1:29:03", "remaining_time": "0:01:28", "throughput": 1350.88, "total_tokens": 7218320}
|
| 7591 |
+
{"current_steps": 37860, "total_steps": 38480, "loss": 0.2168, "lr": 2.3794369999152186e-05, "epoch": 19.677754677754677, "percentage": 98.39, "elapsed_time": "1:29:04", "remaining_time": "0:01:27", "throughput": 1350.9, "total_tokens": 7219312}
|
| 7592 |
+
{"current_steps": 37865, "total_steps": 38480, "loss": 0.2113, "lr": 2.3412849668967284e-05, "epoch": 19.68035343035343, "percentage": 98.4, "elapsed_time": "1:29:04", "remaining_time": "0:01:26", "throughput": 1350.91, "total_tokens": 7220272}
|
| 7593 |
+
{"current_steps": 37870, "total_steps": 38480, "loss": 0.217, "lr": 2.3034410379653145e-05, "epoch": 19.68295218295218, "percentage": 98.41, "elapsed_time": "1:29:05", "remaining_time": "0:01:26", "throughput": 1350.92, "total_tokens": 7221264}
|
| 7594 |
+
{"current_steps": 37875, "total_steps": 38480, "loss": 0.2569, "lr": 2.265905220905917e-05, "epoch": 19.685550935550935, "percentage": 98.43, "elapsed_time": "1:29:06", "remaining_time": "0:01:25", "throughput": 1350.93, "total_tokens": 7222192}
|
| 7595 |
+
{"current_steps": 37880, "total_steps": 38480, "loss": 0.2692, "lr": 2.228677523441025e-05, "epoch": 19.68814968814969, "percentage": 98.44, "elapsed_time": "1:29:06", "remaining_time": "0:01:24", "throughput": 1350.93, "total_tokens": 7223088}
|
| 7596 |
+
{"current_steps": 37885, "total_steps": 38480, "loss": 0.2113, "lr": 2.1917579532288454e-05, "epoch": 19.69074844074844, "percentage": 98.45, "elapsed_time": "1:29:07", "remaining_time": "0:01:23", "throughput": 1350.94, "total_tokens": 7224048}
|
| 7597 |
+
{"current_steps": 37890, "total_steps": 38480, "loss": 0.2395, "lr": 2.1551465178648033e-05, "epoch": 19.693347193347194, "percentage": 98.47, "elapsed_time": "1:29:08", "remaining_time": "0:01:23", "throughput": 1350.95, "total_tokens": 7225008}
|
| 7598 |
+
{"current_steps": 37895, "total_steps": 38480, "loss": 0.212, "lr": 2.1188432248807064e-05, "epoch": 19.695945945945947, "percentage": 98.48, "elapsed_time": "1:29:08", "remaining_time": "0:01:22", "throughput": 1350.95, "total_tokens": 7225936}
|
| 7599 |
+
{"current_steps": 37900, "total_steps": 38480, "loss": 0.2157, "lr": 2.082848081745081e-05, "epoch": 19.698544698544698, "percentage": 98.49, "elapsed_time": "1:29:09", "remaining_time": "0:01:21", "throughput": 1350.96, "total_tokens": 7226896}
|
| 7600 |
+
{"current_steps": 37905, "total_steps": 38480, "loss": 0.2776, "lr": 2.0471610958628372e-05, "epoch": 19.70114345114345, "percentage": 98.51, "elapsed_time": "1:29:10", "remaining_time": "0:01:21", "throughput": 1350.97, "total_tokens": 7227824}
|
| 7601 |
+
{"current_steps": 37910, "total_steps": 38480, "loss": 0.2344, "lr": 2.0117822745757686e-05, "epoch": 19.703742203742205, "percentage": 98.52, "elapsed_time": "1:29:10", "remaining_time": "0:01:20", "throughput": 1350.96, "total_tokens": 7228688}
|
| 7602 |
+
{"current_steps": 37915, "total_steps": 38480, "loss": 0.2664, "lr": 1.9767116251620532e-05, "epoch": 19.706340956340956, "percentage": 98.53, "elapsed_time": "1:29:11", "remaining_time": "0:01:19", "throughput": 1350.97, "total_tokens": 7229648}
|
| 7603 |
+
{"current_steps": 37920, "total_steps": 38480, "loss": 0.2514, "lr": 1.941949154836753e-05, "epoch": 19.70893970893971, "percentage": 98.54, "elapsed_time": "1:29:12", "remaining_time": "0:01:19", "throughput": 1350.98, "total_tokens": 7230608}
|
| 7604 |
+
{"current_steps": 37925, "total_steps": 38480, "loss": 0.2727, "lr": 1.9074948707511473e-05, "epoch": 19.71153846153846, "percentage": 98.56, "elapsed_time": "1:29:12", "remaining_time": "0:01:18", "throughput": 1350.99, "total_tokens": 7231568}
|
| 7605 |
+
{"current_steps": 37930, "total_steps": 38480, "loss": 0.2979, "lr": 1.8733487799932336e-05, "epoch": 19.714137214137214, "percentage": 98.57, "elapsed_time": "1:29:13", "remaining_time": "0:01:17", "throughput": 1351.0, "total_tokens": 7232528}
|
| 7606 |
+
{"current_steps": 37935, "total_steps": 38480, "loss": 0.2923, "lr": 1.839510889587892e-05, "epoch": 19.716735966735968, "percentage": 98.58, "elapsed_time": "1:29:14", "remaining_time": "0:01:16", "throughput": 1351.03, "total_tokens": 7233616}
|
| 7607 |
+
{"current_steps": 37940, "total_steps": 38480, "loss": 0.2623, "lr": 1.805981206496221e-05, "epoch": 19.719334719334718, "percentage": 98.6, "elapsed_time": "1:29:14", "remaining_time": "0:01:16", "throughput": 1351.04, "total_tokens": 7234544}
|
| 7608 |
+
{"current_steps": 37945, "total_steps": 38480, "loss": 0.3519, "lr": 1.772759737616203e-05, "epoch": 19.721933471933472, "percentage": 98.61, "elapsed_time": "1:29:15", "remaining_time": "0:01:15", "throughput": 1351.03, "total_tokens": 7235408}
|
| 7609 |
+
{"current_steps": 37950, "total_steps": 38480, "loss": 0.2244, "lr": 1.739846489782204e-05, "epoch": 19.724532224532226, "percentage": 98.62, "elapsed_time": "1:29:16", "remaining_time": "0:01:14", "throughput": 1351.03, "total_tokens": 7236336}
|
| 7610 |
+
{"current_steps": 37955, "total_steps": 38480, "loss": 0.2461, "lr": 1.7072414697651417e-05, "epoch": 19.727130977130976, "percentage": 98.64, "elapsed_time": "1:29:16", "remaining_time": "0:01:14", "throughput": 1351.04, "total_tokens": 7237296}
|
| 7611 |
+
{"current_steps": 37960, "total_steps": 38480, "loss": 0.2305, "lr": 1.6749446842729832e-05, "epoch": 19.72972972972973, "percentage": 98.65, "elapsed_time": "1:29:17", "remaining_time": "0:01:13", "throughput": 1351.05, "total_tokens": 7238256}
|
| 7612 |
+
{"current_steps": 37965, "total_steps": 38480, "loss": 0.309, "lr": 1.6429561399495808e-05, "epoch": 19.732328482328484, "percentage": 98.66, "elapsed_time": "1:29:18", "remaining_time": "0:01:12", "throughput": 1351.07, "total_tokens": 7239248}
|
| 7613 |
+
{"current_steps": 37970, "total_steps": 38480, "loss": 0.2708, "lr": 1.6112758433760034e-05, "epoch": 19.734927234927234, "percentage": 98.67, "elapsed_time": "1:29:18", "remaining_time": "0:01:11", "throughput": 1351.09, "total_tokens": 7240272}
|
| 7614 |
+
{"current_steps": 37975, "total_steps": 38480, "loss": 0.2981, "lr": 1.5799038010695376e-05, "epoch": 19.737525987525988, "percentage": 98.69, "elapsed_time": "1:29:19", "remaining_time": "0:01:11", "throughput": 1351.1, "total_tokens": 7241200}
|
| 7615 |
+
{"current_steps": 37980, "total_steps": 38480, "loss": 0.2362, "lr": 1.5488400194840213e-05, "epoch": 19.74012474012474, "percentage": 98.7, "elapsed_time": "1:29:20", "remaining_time": "0:01:10", "throughput": 1351.11, "total_tokens": 7242192}
|
| 7616 |
+
{"current_steps": 37985, "total_steps": 38480, "loss": 0.2335, "lr": 1.5180845050103419e-05, "epoch": 19.742723492723492, "percentage": 98.71, "elapsed_time": "1:29:20", "remaining_time": "0:01:09", "throughput": 1351.11, "total_tokens": 7243088}
|
| 7617 |
+
{"current_steps": 37990, "total_steps": 38480, "loss": 0.2238, "lr": 1.4876372639754387e-05, "epoch": 19.745322245322246, "percentage": 98.73, "elapsed_time": "1:29:21", "remaining_time": "0:01:09", "throughput": 1351.13, "total_tokens": 7244080}
|
| 7618 |
+
{"current_steps": 37995, "total_steps": 38480, "loss": 0.2699, "lr": 1.4574983026429676e-05, "epoch": 19.747920997921, "percentage": 98.74, "elapsed_time": "1:29:22", "remaining_time": "0:01:08", "throughput": 1351.14, "total_tokens": 7245072}
|
| 7619 |
+
{"current_steps": 38000, "total_steps": 38480, "loss": 0.2095, "lr": 1.4276676272133025e-05, "epoch": 19.75051975051975, "percentage": 98.75, "elapsed_time": "1:29:22", "remaining_time": "0:01:07", "throughput": 1351.14, "total_tokens": 7245968}
|
| 7620 |
+
{"current_steps": 38005, "total_steps": 38480, "loss": 0.277, "lr": 1.3981452438235342e-05, "epoch": 19.753118503118504, "percentage": 98.77, "elapsed_time": "1:29:23", "remaining_time": "0:01:07", "throughput": 1351.15, "total_tokens": 7246960}
|
| 7621 |
+
{"current_steps": 38010, "total_steps": 38480, "loss": 0.2759, "lr": 1.3689311585468045e-05, "epoch": 19.755717255717254, "percentage": 98.78, "elapsed_time": "1:29:24", "remaining_time": "0:01:06", "throughput": 1351.16, "total_tokens": 7247888}
|
| 7622 |
+
{"current_steps": 38015, "total_steps": 38480, "loss": 0.2343, "lr": 1.340025377393139e-05, "epoch": 19.758316008316008, "percentage": 98.79, "elapsed_time": "1:29:24", "remaining_time": "0:01:05", "throughput": 1351.17, "total_tokens": 7248880}
|
| 7623 |
+
{"current_steps": 38020, "total_steps": 38480, "loss": 0.2684, "lr": 1.311427906309448e-05, "epoch": 19.760914760914762, "percentage": 98.8, "elapsed_time": "1:29:25", "remaining_time": "0:01:04", "throughput": 1351.19, "total_tokens": 7249872}
|
| 7624 |
+
{"current_steps": 38025, "total_steps": 38480, "loss": 0.249, "lr": 1.2831387511786917e-05, "epoch": 19.763513513513512, "percentage": 98.82, "elapsed_time": "1:29:26", "remaining_time": "0:01:04", "throughput": 1351.2, "total_tokens": 7250832}
|
| 7625 |
+
{"current_steps": 38030, "total_steps": 38480, "loss": 0.3006, "lr": 1.2551579178205484e-05, "epoch": 19.766112266112266, "percentage": 98.83, "elapsed_time": "1:29:26", "remaining_time": "0:01:03", "throughput": 1351.21, "total_tokens": 7251792}
|
| 7626 |
+
{"current_steps": 38035, "total_steps": 38480, "loss": 0.2617, "lr": 1.2274854119917466e-05, "epoch": 19.76871101871102, "percentage": 98.84, "elapsed_time": "1:29:27", "remaining_time": "0:01:02", "throughput": 1351.22, "total_tokens": 7252752}
|
| 7627 |
+
{"current_steps": 38040, "total_steps": 38480, "loss": 0.2277, "lr": 1.200121239384566e-05, "epoch": 19.77130977130977, "percentage": 98.86, "elapsed_time": "1:29:28", "remaining_time": "0:01:02", "throughput": 1351.22, "total_tokens": 7253680}
|
| 7628 |
+
{"current_steps": 38045, "total_steps": 38480, "loss": 0.289, "lr": 1.1730654056290035e-05, "epoch": 19.773908523908524, "percentage": 98.87, "elapsed_time": "1:29:28", "remaining_time": "0:01:01", "throughput": 1351.23, "total_tokens": 7254640}
|
| 7629 |
+
{"current_steps": 38050, "total_steps": 38480, "loss": 0.234, "lr": 1.1463179162909398e-05, "epoch": 19.776507276507278, "percentage": 98.88, "elapsed_time": "1:29:29", "remaining_time": "0:01:00", "throughput": 1351.23, "total_tokens": 7255536}
|
| 7630 |
+
{"current_steps": 38055, "total_steps": 38480, "loss": 0.2266, "lr": 1.1198787768728068e-05, "epoch": 19.77910602910603, "percentage": 98.9, "elapsed_time": "1:29:30", "remaining_time": "0:00:59", "throughput": 1351.24, "total_tokens": 7256496}
|
| 7631 |
+
{"current_steps": 38060, "total_steps": 38480, "loss": 0.2867, "lr": 1.0937479928139204e-05, "epoch": 19.781704781704782, "percentage": 98.91, "elapsed_time": "1:29:30", "remaining_time": "0:00:59", "throughput": 1351.25, "total_tokens": 7257424}
|
| 7632 |
+
{"current_steps": 38065, "total_steps": 38480, "loss": 0.269, "lr": 1.0679255694899802e-05, "epoch": 19.784303534303533, "percentage": 98.92, "elapsed_time": "1:29:31", "remaining_time": "0:00:58", "throughput": 1351.26, "total_tokens": 7258384}
|
| 7633 |
+
{"current_steps": 38070, "total_steps": 38480, "loss": 0.2452, "lr": 1.0424115122132371e-05, "epoch": 19.786902286902286, "percentage": 98.93, "elapsed_time": "1:29:32", "remaining_time": "0:00:57", "throughput": 1351.26, "total_tokens": 7259312}
|
| 7634 |
+
{"current_steps": 38075, "total_steps": 38480, "loss": 0.2423, "lr": 1.0172058262324924e-05, "epoch": 19.78950103950104, "percentage": 98.95, "elapsed_time": "1:29:32", "remaining_time": "0:00:57", "throughput": 1351.27, "total_tokens": 7260272}
|
| 7635 |
+
{"current_steps": 38080, "total_steps": 38480, "loss": 0.2205, "lr": 9.923085167332645e-06, "epoch": 19.79209979209979, "percentage": 98.96, "elapsed_time": "1:29:33", "remaining_time": "0:00:56", "throughput": 1351.28, "total_tokens": 7261232}
|
| 7636 |
+
{"current_steps": 38085, "total_steps": 38480, "loss": 0.2485, "lr": 9.677195888374568e-06, "epoch": 19.794698544698544, "percentage": 98.97, "elapsed_time": "1:29:34", "remaining_time": "0:00:55", "throughput": 1351.29, "total_tokens": 7262192}
|
| 7637 |
+
{"current_steps": 38090, "total_steps": 38480, "loss": 0.2112, "lr": 9.43439047603689e-06, "epoch": 19.7972972972973, "percentage": 98.99, "elapsed_time": "1:29:34", "remaining_time": "0:00:55", "throughput": 1351.3, "total_tokens": 7263120}
|
| 7638 |
+
{"current_steps": 38095, "total_steps": 38480, "loss": 0.2591, "lr": 9.19466898026966e-06, "epoch": 19.79989604989605, "percentage": 99.0, "elapsed_time": "1:29:35", "remaining_time": "0:00:54", "throughput": 1351.3, "total_tokens": 7264016}
|
| 7639 |
+
{"current_steps": 38100, "total_steps": 38480, "loss": 0.2628, "lr": 8.95803145039009e-06, "epoch": 19.802494802494802, "percentage": 99.01, "elapsed_time": "1:29:36", "remaining_time": "0:00:53", "throughput": 1351.32, "total_tokens": 7265040}
|
| 7640 |
+
{"current_steps": 38105, "total_steps": 38480, "loss": 0.2323, "lr": 8.724477935079244e-06, "epoch": 19.805093555093556, "percentage": 99.03, "elapsed_time": "1:29:36", "remaining_time": "0:00:52", "throughput": 1351.32, "total_tokens": 7265968}
|
| 7641 |
+
{"current_steps": 38110, "total_steps": 38480, "loss": 0.2587, "lr": 8.494008482383685e-06, "epoch": 19.807692307692307, "percentage": 99.04, "elapsed_time": "1:29:37", "remaining_time": "0:00:52", "throughput": 1351.33, "total_tokens": 7266928}
|
| 7642 |
+
{"current_steps": 38115, "total_steps": 38480, "loss": 0.2446, "lr": 8.266623139718821e-06, "epoch": 19.81029106029106, "percentage": 99.05, "elapsed_time": "1:29:38", "remaining_time": "0:00:51", "throughput": 1351.34, "total_tokens": 7267856}
|
| 7643 |
+
{"current_steps": 38120, "total_steps": 38480, "loss": 0.1633, "lr": 8.04232195386223e-06, "epoch": 19.812889812889814, "percentage": 99.06, "elapsed_time": "1:29:38", "remaining_time": "0:00:50", "throughput": 1351.34, "total_tokens": 7268784}
|
| 7644 |
+
{"current_steps": 38125, "total_steps": 38480, "loss": 0.2507, "lr": 7.821104970957004e-06, "epoch": 19.815488565488565, "percentage": 99.08, "elapsed_time": "1:29:39", "remaining_time": "0:00:50", "throughput": 1351.35, "total_tokens": 7269744}
|
| 7645 |
+
{"current_steps": 38130, "total_steps": 38480, "loss": 0.2875, "lr": 7.602972236513406e-06, "epoch": 19.81808731808732, "percentage": 99.09, "elapsed_time": "1:29:40", "remaining_time": "0:00:49", "throughput": 1351.36, "total_tokens": 7270704}
|
| 7646 |
+
{"current_steps": 38135, "total_steps": 38480, "loss": 0.2212, "lr": 7.387923795408868e-06, "epoch": 19.820686070686072, "percentage": 99.1, "elapsed_time": "1:29:40", "remaining_time": "0:00:48", "throughput": 1351.38, "total_tokens": 7271696}
|
| 7647 |
+
{"current_steps": 38140, "total_steps": 38480, "loss": 0.2723, "lr": 7.175959691879674e-06, "epoch": 19.823284823284823, "percentage": 99.12, "elapsed_time": "1:29:41", "remaining_time": "0:00:47", "throughput": 1351.39, "total_tokens": 7272656}
|
| 7648 |
+
{"current_steps": 38145, "total_steps": 38480, "loss": 0.2447, "lr": 6.967079969535938e-06, "epoch": 19.825883575883577, "percentage": 99.13, "elapsed_time": "1:29:42", "remaining_time": "0:00:47", "throughput": 1351.4, "total_tokens": 7273616}
|
| 7649 |
+
{"current_steps": 38150, "total_steps": 38480, "loss": 0.2646, "lr": 6.761284671346624e-06, "epoch": 19.828482328482327, "percentage": 99.14, "elapsed_time": "1:29:42", "remaining_time": "0:00:46", "throughput": 1351.41, "total_tokens": 7274576}
|
| 7650 |
+
{"current_steps": 38155, "total_steps": 38480, "loss": 0.2136, "lr": 6.558573839649528e-06, "epoch": 19.83108108108108, "percentage": 99.16, "elapsed_time": "1:29:43", "remaining_time": "0:00:45", "throughput": 1351.41, "total_tokens": 7275504}
|
| 7651 |
+
{"current_steps": 38160, "total_steps": 38480, "loss": 0.2124, "lr": 6.3589475161462955e-06, "epoch": 19.833679833679835, "percentage": 99.17, "elapsed_time": "1:29:44", "remaining_time": "0:00:45", "throughput": 1351.42, "total_tokens": 7276464}
|
| 7652 |
+
{"current_steps": 38165, "total_steps": 38480, "loss": 0.3055, "lr": 6.162405741905741e-06, "epoch": 19.836278586278585, "percentage": 99.18, "elapsed_time": "1:29:44", "remaining_time": "0:00:44", "throughput": 1351.43, "total_tokens": 7277424}
|
| 7653 |
+
{"current_steps": 38170, "total_steps": 38480, "loss": 0.2519, "lr": 5.968948557362186e-06, "epoch": 19.83887733887734, "percentage": 99.19, "elapsed_time": "1:29:45", "remaining_time": "0:00:43", "throughput": 1351.44, "total_tokens": 7278352}
|
| 7654 |
+
{"current_steps": 38175, "total_steps": 38480, "loss": 0.2555, "lr": 5.77857600231213e-06, "epoch": 19.841476091476093, "percentage": 99.21, "elapsed_time": "1:29:46", "remaining_time": "0:00:43", "throughput": 1351.45, "total_tokens": 7279312}
|
| 7655 |
+
{"current_steps": 38180, "total_steps": 38480, "loss": 0.2912, "lr": 5.5912881159209115e-06, "epoch": 19.844074844074843, "percentage": 99.22, "elapsed_time": "1:29:46", "remaining_time": "0:00:42", "throughput": 1351.45, "total_tokens": 7280240}
|
| 7656 |
+
{"current_steps": 38185, "total_steps": 38480, "loss": 0.2799, "lr": 5.407084936719375e-06, "epoch": 19.846673596673597, "percentage": 99.23, "elapsed_time": "1:29:47", "remaining_time": "0:00:41", "throughput": 1351.46, "total_tokens": 7281168}
|
| 7657 |
+
{"current_steps": 38190, "total_steps": 38480, "loss": 0.2662, "lr": 5.225966502598877e-06, "epoch": 19.84927234927235, "percentage": 99.25, "elapsed_time": "1:29:48", "remaining_time": "0:00:40", "throughput": 1351.45, "total_tokens": 7282064}
|
| 7658 |
+
{"current_steps": 38195, "total_steps": 38480, "loss": 0.2622, "lr": 5.04793285082461e-06, "epoch": 19.8518711018711, "percentage": 99.26, "elapsed_time": "1:29:48", "remaining_time": "0:00:40", "throughput": 1351.46, "total_tokens": 7283024}
|
| 7659 |
+
{"current_steps": 38200, "total_steps": 38480, "loss": 0.295, "lr": 4.872984018017279e-06, "epoch": 19.854469854469855, "percentage": 99.27, "elapsed_time": "1:29:49", "remaining_time": "0:00:39", "throughput": 1351.47, "total_tokens": 7283952}
|
| 7660 |
+
{"current_steps": 38205, "total_steps": 38480, "loss": 0.2957, "lr": 4.70112004017309e-06, "epoch": 19.85706860706861, "percentage": 99.29, "elapsed_time": "1:29:50", "remaining_time": "0:00:38", "throughput": 1351.49, "total_tokens": 7284944}
|
| 7661 |
+
{"current_steps": 38210, "total_steps": 38480, "loss": 0.3198, "lr": 4.5323409526437655e-06, "epoch": 19.85966735966736, "percentage": 99.3, "elapsed_time": "1:29:50", "remaining_time": "0:00:38", "throughput": 1351.49, "total_tokens": 7285872}
|
| 7662 |
+
{"current_steps": 38215, "total_steps": 38480, "loss": 0.2568, "lr": 4.36664679015486e-06, "epoch": 19.862266112266113, "percentage": 99.31, "elapsed_time": "1:29:51", "remaining_time": "0:00:37", "throughput": 1351.49, "total_tokens": 7286768}
|
| 7663 |
+
{"current_steps": 38220, "total_steps": 38480, "loss": 0.198, "lr": 4.204037586792441e-06, "epoch": 19.864864864864863, "percentage": 99.32, "elapsed_time": "1:29:52", "remaining_time": "0:00:36", "throughput": 1351.5, "total_tokens": 7287728}
|
| 7664 |
+
{"current_steps": 38225, "total_steps": 38480, "loss": 0.225, "lr": 4.044513376008085e-06, "epoch": 19.867463617463617, "percentage": 99.34, "elapsed_time": "1:29:52", "remaining_time": "0:00:35", "throughput": 1351.51, "total_tokens": 7288688}
|
| 7665 |
+
{"current_steps": 38230, "total_steps": 38480, "loss": 0.2285, "lr": 3.888074190620539e-06, "epoch": 19.87006237006237, "percentage": 99.35, "elapsed_time": "1:29:53", "remaining_time": "0:00:35", "throughput": 1351.54, "total_tokens": 7289776}
|
| 7666 |
+
{"current_steps": 38235, "total_steps": 38480, "loss": 0.2544, "lr": 3.734720062812391e-06, "epoch": 19.87266112266112, "percentage": 99.36, "elapsed_time": "1:29:54", "remaining_time": "0:00:34", "throughput": 1351.57, "total_tokens": 7290896}
|
| 7667 |
+
{"current_steps": 38240, "total_steps": 38480, "loss": 0.3097, "lr": 3.584451024133406e-06, "epoch": 19.875259875259875, "percentage": 99.38, "elapsed_time": "1:29:55", "remaining_time": "0:00:33", "throughput": 1351.58, "total_tokens": 7291856}
|
| 7668 |
+
{"current_steps": 38245, "total_steps": 38480, "loss": 0.227, "lr": 3.437267105497188e-06, "epoch": 19.87785862785863, "percentage": 99.39, "elapsed_time": "1:29:55", "remaining_time": "0:00:33", "throughput": 1351.59, "total_tokens": 7292784}
|
| 7669 |
+
{"current_steps": 38250, "total_steps": 38480, "loss": 0.2546, "lr": 3.29316833718285e-06, "epoch": 19.88045738045738, "percentage": 99.4, "elapsed_time": "1:29:56", "remaining_time": "0:00:32", "throughput": 1351.6, "total_tokens": 7293744}
|
| 7670 |
+
{"current_steps": 38255, "total_steps": 38480, "loss": 0.272, "lr": 3.152154748835012e-06, "epoch": 19.883056133056133, "percentage": 99.42, "elapsed_time": "1:29:57", "remaining_time": "0:00:31", "throughput": 1351.61, "total_tokens": 7294704}
|
| 7671 |
+
{"current_steps": 38260, "total_steps": 38480, "loss": 0.2135, "lr": 3.0142263694638015e-06, "epoch": 19.885654885654887, "percentage": 99.43, "elapsed_time": "1:29:57", "remaining_time": "0:00:31", "throughput": 1351.61, "total_tokens": 7295600}
|
| 7672 |
+
{"current_steps": 38265, "total_steps": 38480, "loss": 0.1982, "lr": 2.8793832274431885e-06, "epoch": 19.888253638253637, "percentage": 99.44, "elapsed_time": "1:29:58", "remaining_time": "0:00:30", "throughput": 1351.61, "total_tokens": 7296528}
|
| 7673 |
+
{"current_steps": 38270, "total_steps": 38480, "loss": 0.3234, "lr": 2.7476253505143155e-06, "epoch": 19.89085239085239, "percentage": 99.45, "elapsed_time": "1:29:59", "remaining_time": "0:00:29", "throughput": 1351.62, "total_tokens": 7297520}
|
| 7674 |
+
{"current_steps": 38275, "total_steps": 38480, "loss": 0.2529, "lr": 2.6189527657821675e-06, "epoch": 19.893451143451145, "percentage": 99.47, "elapsed_time": "1:29:59", "remaining_time": "0:00:28", "throughput": 1351.63, "total_tokens": 7298480}
|
| 7675 |
+
{"current_steps": 38280, "total_steps": 38480, "loss": 0.2361, "lr": 2.493365499720568e-06, "epoch": 19.896049896049895, "percentage": 99.48, "elapsed_time": "1:30:00", "remaining_time": "0:00:28", "throughput": 1351.65, "total_tokens": 7299472}
|
| 7676 |
+
{"current_steps": 38285, "total_steps": 38480, "loss": 0.2956, "lr": 2.3708635781621855e-06, "epoch": 19.89864864864865, "percentage": 99.49, "elapsed_time": "1:30:01", "remaining_time": "0:00:27", "throughput": 1351.66, "total_tokens": 7300464}
|
| 7677 |
+
{"current_steps": 38290, "total_steps": 38480, "loss": 0.2655, "lr": 2.2514470263101935e-06, "epoch": 19.901247401247403, "percentage": 99.51, "elapsed_time": "1:30:01", "remaining_time": "0:00:26", "throughput": 1351.67, "total_tokens": 7301392}
|
| 7678 |
+
{"current_steps": 38295, "total_steps": 38480, "loss": 0.2511, "lr": 2.1351158687332727e-06, "epoch": 19.903846153846153, "percentage": 99.52, "elapsed_time": "1:30:02", "remaining_time": "0:00:26", "throughput": 1351.67, "total_tokens": 7302320}
|
| 7679 |
+
{"current_steps": 38300, "total_steps": 38480, "loss": 0.2671, "lr": 2.0218701293606146e-06, "epoch": 19.906444906444907, "percentage": 99.53, "elapsed_time": "1:30:03", "remaining_time": "0:00:25", "throughput": 1351.69, "total_tokens": 7303344}
|
| 7680 |
+
{"current_steps": 38305, "total_steps": 38480, "loss": 0.2385, "lr": 1.911709831490249e-06, "epoch": 19.909043659043657, "percentage": 99.55, "elapsed_time": "1:30:03", "remaining_time": "0:00:24", "throughput": 1351.7, "total_tokens": 7304272}
|
| 7681 |
+
{"current_steps": 38310, "total_steps": 38480, "loss": 0.2469, "lr": 1.8046349977857146e-06, "epoch": 19.91164241164241, "percentage": 99.56, "elapsed_time": "1:30:04", "remaining_time": "0:00:23", "throughput": 1351.71, "total_tokens": 7305232}
|
| 7682 |
+
{"current_steps": 38315, "total_steps": 38480, "loss": 0.2735, "lr": 1.7006456502727252e-06, "epoch": 19.914241164241165, "percentage": 99.57, "elapsed_time": "1:30:05", "remaining_time": "0:00:23", "throughput": 1351.71, "total_tokens": 7306160}
|
| 7683 |
+
{"current_steps": 38320, "total_steps": 38480, "loss": 0.2825, "lr": 1.599741810347499e-06, "epoch": 19.916839916839916, "percentage": 99.58, "elapsed_time": "1:30:05", "remaining_time": "0:00:22", "throughput": 1351.73, "total_tokens": 7307152}
|
| 7684 |
+
{"current_steps": 38325, "total_steps": 38480, "loss": 0.2463, "lr": 1.501923498766766e-06, "epoch": 19.91943866943867, "percentage": 99.6, "elapsed_time": "1:30:06", "remaining_time": "0:00:21", "throughput": 1351.74, "total_tokens": 7308144}
|
| 7685 |
+
{"current_steps": 38330, "total_steps": 38480, "loss": 0.2883, "lr": 1.4071907356544289e-06, "epoch": 19.922037422037423, "percentage": 99.61, "elapsed_time": "1:30:07", "remaining_time": "0:00:21", "throughput": 1351.75, "total_tokens": 7309104}
|
| 7686 |
+
{"current_steps": 38335, "total_steps": 38480, "loss": 0.2434, "lr": 1.3155435404982319e-06, "epoch": 19.924636174636174, "percentage": 99.62, "elapsed_time": "1:30:07", "remaining_time": "0:00:20", "throughput": 1351.76, "total_tokens": 7310032}
|
| 7687 |
+
{"current_steps": 38340, "total_steps": 38480, "loss": 0.2416, "lr": 1.2269819321530927e-06, "epoch": 19.927234927234927, "percentage": 99.64, "elapsed_time": "1:30:08", "remaining_time": "0:00:19", "throughput": 1351.77, "total_tokens": 7311056}
|
| 7688 |
+
{"current_steps": 38345, "total_steps": 38480, "loss": 0.2765, "lr": 1.1415059288377716e-06, "epoch": 19.92983367983368, "percentage": 99.65, "elapsed_time": "1:30:09", "remaining_time": "0:00:19", "throughput": 1351.8, "total_tokens": 7312144}
|
| 7689 |
+
{"current_steps": 38350, "total_steps": 38480, "loss": 0.271, "lr": 1.0591155481382009e-06, "epoch": 19.93243243243243, "percentage": 99.66, "elapsed_time": "1:30:09", "remaining_time": "0:00:18", "throughput": 1351.82, "total_tokens": 7313136}
|
| 7690 |
+
{"current_steps": 38355, "total_steps": 38480, "loss": 0.2958, "lr": 9.798108070024901e-07, "epoch": 19.935031185031185, "percentage": 99.68, "elapsed_time": "1:30:10", "remaining_time": "0:00:17", "throughput": 1351.83, "total_tokens": 7314096}
|
| 7691 |
+
{"current_steps": 38360, "total_steps": 38480, "loss": 0.2885, "lr": 9.035917217442568e-07, "epoch": 19.93762993762994, "percentage": 99.69, "elapsed_time": "1:30:11", "remaining_time": "0:00:16", "throughput": 1351.84, "total_tokens": 7315088}
|
| 7692 |
+
{"current_steps": 38365, "total_steps": 38480, "loss": 0.2668, "lr": 8.304583080476213e-07, "epoch": 19.94022869022869, "percentage": 99.7, "elapsed_time": "1:30:11", "remaining_time": "0:00:16", "throughput": 1351.85, "total_tokens": 7316048}
|
| 7693 |
+
{"current_steps": 38370, "total_steps": 38480, "loss": 0.2484, "lr": 7.604105809538852e-07, "epoch": 19.942827442827443, "percentage": 99.71, "elapsed_time": "1:30:12", "remaining_time": "0:00:15", "throughput": 1351.87, "total_tokens": 7317072}
|
| 7694 |
+
{"current_steps": 38375, "total_steps": 38480, "loss": 0.2408, "lr": 6.934485548748537e-07, "epoch": 19.945426195426194, "percentage": 99.73, "elapsed_time": "1:30:13", "remaining_time": "0:00:14", "throughput": 1351.88, "total_tokens": 7318032}
|
| 7695 |
+
{"current_steps": 38380, "total_steps": 38480, "loss": 0.2317, "lr": 6.295722435878393e-07, "epoch": 19.948024948024948, "percentage": 99.74, "elapsed_time": "1:30:13", "remaining_time": "0:00:14", "throughput": 1351.9, "total_tokens": 7319024}
|
| 7696 |
+
{"current_steps": 38385, "total_steps": 38480, "loss": 0.2567, "lr": 5.687816602323314e-07, "epoch": 19.9506237006237, "percentage": 99.75, "elapsed_time": "1:30:14", "remaining_time": "0:00:13", "throughput": 1351.9, "total_tokens": 7319952}
|
| 7697 |
+
{"current_steps": 38390, "total_steps": 38480, "loss": 0.2696, "lr": 5.110768173133273e-07, "epoch": 19.953222453222452, "percentage": 99.77, "elapsed_time": "1:30:15", "remaining_time": "0:00:12", "throughput": 1351.91, "total_tokens": 7320912}
|
| 7698 |
+
{"current_steps": 38395, "total_steps": 38480, "loss": 0.2517, "lr": 4.5645772670299675e-07, "epoch": 19.955821205821206, "percentage": 99.78, "elapsed_time": "1:30:15", "remaining_time": "0:00:11", "throughput": 1351.91, "total_tokens": 7321776}
|
| 7699 |
+
{"current_steps": 38400, "total_steps": 38480, "loss": 0.2822, "lr": 4.0492439963901724e-07, "epoch": 19.95841995841996, "percentage": 99.79, "elapsed_time": "1:30:16", "remaining_time": "0:00:11", "throughput": 1351.92, "total_tokens": 7322736}
|
| 7700 |
+
{"current_steps": 38405, "total_steps": 38480, "loss": 0.2786, "lr": 3.564768467212431e-07, "epoch": 19.96101871101871, "percentage": 99.81, "elapsed_time": "1:30:17", "remaining_time": "0:00:10", "throughput": 1351.93, "total_tokens": 7323696}
|
| 7701 |
+
{"current_steps": 38410, "total_steps": 38480, "loss": 0.2675, "lr": 3.1111507791836686e-07, "epoch": 19.963617463617464, "percentage": 99.82, "elapsed_time": "1:30:17", "remaining_time": "0:00:09", "throughput": 1351.93, "total_tokens": 7324624}
|
| 7702 |
+
{"current_steps": 38415, "total_steps": 38480, "loss": 0.2856, "lr": 2.688391025595926e-07, "epoch": 19.966216216216218, "percentage": 99.83, "elapsed_time": "1:30:18", "remaining_time": "0:00:09", "throughput": 1351.93, "total_tokens": 7325552}
|
| 7703 |
+
{"current_steps": 38420, "total_steps": 38480, "loss": 0.196, "lr": 2.2964892934462798e-07, "epoch": 19.968814968814968, "percentage": 99.84, "elapsed_time": "1:30:19", "remaining_time": "0:00:08", "throughput": 1351.93, "total_tokens": 7326448}
|
| 7704 |
+
{"current_steps": 38425, "total_steps": 38480, "loss": 0.2665, "lr": 1.9354456633535743e-07, "epoch": 19.97141372141372, "percentage": 99.86, "elapsed_time": "1:30:19", "remaining_time": "0:00:07", "throughput": 1351.95, "total_tokens": 7327440}
|
| 7705 |
+
{"current_steps": 38430, "total_steps": 38480, "loss": 0.265, "lr": 1.60526020959173e-07, "epoch": 19.974012474012476, "percentage": 99.87, "elapsed_time": "1:30:20", "remaining_time": "0:00:07", "throughput": 1351.94, "total_tokens": 7328304}
|
| 7706 |
+
{"current_steps": 38435, "total_steps": 38480, "loss": 0.2307, "lr": 1.3059330000730895e-07, "epoch": 19.976611226611226, "percentage": 99.88, "elapsed_time": "1:30:21", "remaining_time": "0:00:06", "throughput": 1351.96, "total_tokens": 7329296}
|
| 7707 |
+
{"current_steps": 38440, "total_steps": 38480, "loss": 0.287, "lr": 1.0374640963983771e-07, "epoch": 19.97920997920998, "percentage": 99.9, "elapsed_time": "1:30:21", "remaining_time": "0:00:05", "throughput": 1351.98, "total_tokens": 7330320}
|
| 7708 |
+
{"current_steps": 38445, "total_steps": 38480, "loss": 0.277, "lr": 7.998535537900864e-08, "epoch": 19.981808731808734, "percentage": 99.91, "elapsed_time": "1:30:22", "remaining_time": "0:00:04", "throughput": 1351.99, "total_tokens": 7331280}
|
| 7709 |
+
{"current_steps": 38450, "total_steps": 38480, "loss": 0.258, "lr": 5.931014211091323e-08, "epoch": 19.984407484407484, "percentage": 99.92, "elapsed_time": "1:30:23", "remaining_time": "0:00:04", "throughput": 1352.0, "total_tokens": 7332272}
|
| 7710 |
+
{"current_steps": 38455, "total_steps": 38480, "loss": 0.2851, "lr": 4.1720774092146584e-08, "epoch": 19.987006237006238, "percentage": 99.94, "elapsed_time": "1:30:23", "remaining_time": "0:00:03", "throughput": 1352.01, "total_tokens": 7333232}
|
| 7711 |
+
{"current_steps": 38460, "total_steps": 38480, "loss": 0.2741, "lr": 2.7217254941480638e-08, "epoch": 19.989604989604988, "percentage": 99.95, "elapsed_time": "1:30:24", "remaining_time": "0:00:02", "throughput": 1352.02, "total_tokens": 7334192}
|
| 7712 |
+
{"current_steps": 38465, "total_steps": 38480, "loss": 0.212, "lr": 1.5799587638198886e-08, "epoch": 19.992203742203742, "percentage": 99.96, "elapsed_time": "1:30:25", "remaining_time": "0:00:02", "throughput": 1352.02, "total_tokens": 7335120}
|
| 7713 |
+
{"current_steps": 38470, "total_steps": 38480, "loss": 0.2857, "lr": 7.46777453708436e-09, "epoch": 19.994802494802496, "percentage": 99.97, "elapsed_time": "1:30:25", "remaining_time": "0:00:01", "throughput": 1352.04, "total_tokens": 7336144}
|
| 7714 |
+
{"current_steps": 38475, "total_steps": 38480, "loss": 0.2314, "lr": 2.221817346770294e-09, "epoch": 19.997401247401246, "percentage": 99.99, "elapsed_time": "1:30:26", "remaining_time": "0:00:00", "throughput": 1352.04, "total_tokens": 7337040}
|
| 7715 |
+
{"current_steps": 38480, "total_steps": 38480, "loss": 0.2678, "lr": 6.171714972413867e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:30:27", "remaining_time": "0:00:00", "throughput": 1352.03, "total_tokens": 7337920}
|
| 7716 |
+
{"current_steps": 38480, "total_steps": 38480, "eval_loss": 0.24908336997032166, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:30:40", "remaining_time": "0:00:00", "throughput": 1348.84, "total_tokens": 7337920}
|
| 7717 |
+
{"current_steps": 38480, "total_steps": 38480, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:30:41", "remaining_time": "0:00:00", "throughput": 1348.59, "total_tokens": 7337920}
|