Training in progress, step 22320
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +217 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66dc4efa0a01f3a5995b26a32a4913fb9d851c0fc74fb423295d6252e8f8a3d3
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -4266,3 +4266,220 @@
|
|
| 4266 |
{"current_steps": 21235, "total_steps": 22320, "loss": 0.2853, "lr": 3.597093990697159e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:53:42", "remaining_time": "0:02:44", "throughput": 2507.18, "total_tokens": 8078256}
|
| 4267 |
{"current_steps": 21240, "total_steps": 22320, "loss": 0.0988, "lr": 3.5641265732266895e-07, "epoch": 19.032258064516128, "percentage": 95.16, "elapsed_time": "0:53:42", "remaining_time": "0:02:43", "throughput": 2507.13, "total_tokens": 8080048}
|
| 4268 |
{"current_steps": 21245, "total_steps": 22320, "loss": 0.2938, "lr": 3.531309840838798e-07, "epoch": 19.03673835125448, "percentage": 95.18, "elapsed_time": "0:53:43", "remaining_time": "0:02:43", "throughput": 2507.14, "total_tokens": 8081904}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4266 |
{"current_steps": 21235, "total_steps": 22320, "loss": 0.2853, "lr": 3.597093990697159e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:53:42", "remaining_time": "0:02:44", "throughput": 2507.18, "total_tokens": 8078256}
|
| 4267 |
{"current_steps": 21240, "total_steps": 22320, "loss": 0.0988, "lr": 3.5641265732266895e-07, "epoch": 19.032258064516128, "percentage": 95.16, "elapsed_time": "0:53:42", "remaining_time": "0:02:43", "throughput": 2507.13, "total_tokens": 8080048}
|
| 4268 |
{"current_steps": 21245, "total_steps": 22320, "loss": 0.2938, "lr": 3.531309840838798e-07, "epoch": 19.03673835125448, "percentage": 95.18, "elapsed_time": "0:53:43", "remaining_time": "0:02:43", "throughput": 2507.14, "total_tokens": 8081904}
|
| 4269 |
+
{"current_steps": 21250, "total_steps": 22320, "loss": 0.1307, "lr": 3.498643813599517e-07, "epoch": 19.04121863799283, "percentage": 95.21, "elapsed_time": "0:53:44", "remaining_time": "0:02:42", "throughput": 2507.17, "total_tokens": 8083920}
|
| 4270 |
+
{"current_steps": 21255, "total_steps": 22320, "loss": 0.1476, "lr": 3.466128511482758e-07, "epoch": 19.045698924731184, "percentage": 95.23, "elapsed_time": "0:53:45", "remaining_time": "0:02:41", "throughput": 2507.17, "total_tokens": 8085648}
|
| 4271 |
+
{"current_steps": 21260, "total_steps": 22320, "loss": 0.0482, "lr": 3.4337639543702283e-07, "epoch": 19.050179211469533, "percentage": 95.25, "elapsed_time": "0:53:45", "remaining_time": "0:02:40", "throughput": 2507.19, "total_tokens": 8087600}
|
| 4272 |
+
{"current_steps": 21265, "total_steps": 22320, "loss": 0.2051, "lr": 3.401550162051459e-07, "epoch": 19.054659498207887, "percentage": 95.27, "elapsed_time": "0:53:46", "remaining_time": "0:02:40", "throughput": 2507.2, "total_tokens": 8089360}
|
| 4273 |
+
{"current_steps": 21270, "total_steps": 22320, "loss": 0.0605, "lr": 3.3694871542238606e-07, "epoch": 19.059139784946236, "percentage": 95.3, "elapsed_time": "0:53:47", "remaining_time": "0:02:39", "throughput": 2507.22, "total_tokens": 8091216}
|
| 4274 |
+
{"current_steps": 21275, "total_steps": 22320, "loss": 0.2373, "lr": 3.3375749504925837e-07, "epoch": 19.06362007168459, "percentage": 95.32, "elapsed_time": "0:53:47", "remaining_time": "0:02:38", "throughput": 2507.21, "total_tokens": 8093008}
|
| 4275 |
+
{"current_steps": 21280, "total_steps": 22320, "loss": 0.2012, "lr": 3.3058135703706027e-07, "epoch": 19.06810035842294, "percentage": 95.34, "elapsed_time": "0:53:48", "remaining_time": "0:02:37", "throughput": 2507.22, "total_tokens": 8094832}
|
| 4276 |
+
{"current_steps": 21285, "total_steps": 22320, "loss": 0.113, "lr": 3.2742030332786056e-07, "epoch": 19.072580645161292, "percentage": 95.36, "elapsed_time": "0:53:49", "remaining_time": "0:02:37", "throughput": 2507.22, "total_tokens": 8096560}
|
| 4277 |
+
{"current_steps": 21290, "total_steps": 22320, "loss": 0.1246, "lr": 3.242743358545131e-07, "epoch": 19.07706093189964, "percentage": 95.39, "elapsed_time": "0:53:50", "remaining_time": "0:02:36", "throughput": 2507.23, "total_tokens": 8098416}
|
| 4278 |
+
{"current_steps": 21295, "total_steps": 22320, "loss": 0.1182, "lr": 3.211434565406457e-07, "epoch": 19.081541218637994, "percentage": 95.41, "elapsed_time": "0:53:50", "remaining_time": "0:02:35", "throughput": 2507.23, "total_tokens": 8100304}
|
| 4279 |
+
{"current_steps": 21300, "total_steps": 22320, "loss": 0.0825, "lr": 3.1802766730065493e-07, "epoch": 19.086021505376344, "percentage": 95.43, "elapsed_time": "0:53:51", "remaining_time": "0:02:34", "throughput": 2507.26, "total_tokens": 8102288}
|
| 4280 |
+
{"current_steps": 21305, "total_steps": 22320, "loss": 0.1134, "lr": 3.1492697003971673e-07, "epoch": 19.090501792114694, "percentage": 95.45, "elapsed_time": "0:53:52", "remaining_time": "0:02:33", "throughput": 2507.29, "total_tokens": 8104112}
|
| 4281 |
+
{"current_steps": 21310, "total_steps": 22320, "loss": 0.069, "lr": 3.1184136665377004e-07, "epoch": 19.094982078853047, "percentage": 95.47, "elapsed_time": "0:53:52", "remaining_time": "0:02:33", "throughput": 2507.32, "total_tokens": 8106032}
|
| 4282 |
+
{"current_steps": 21315, "total_steps": 22320, "loss": 0.0716, "lr": 3.087708590295363e-07, "epoch": 19.099462365591396, "percentage": 95.5, "elapsed_time": "0:53:53", "remaining_time": "0:02:32", "throughput": 2507.34, "total_tokens": 8107888}
|
| 4283 |
+
{"current_steps": 21320, "total_steps": 22320, "loss": 0.0468, "lr": 3.057154490444969e-07, "epoch": 19.10394265232975, "percentage": 95.52, "elapsed_time": "0:53:54", "remaining_time": "0:02:31", "throughput": 2507.38, "total_tokens": 8109904}
|
| 4284 |
+
{"current_steps": 21325, "total_steps": 22320, "loss": 0.1384, "lr": 3.026751385669102e-07, "epoch": 19.1084229390681, "percentage": 95.54, "elapsed_time": "0:53:55", "remaining_time": "0:02:30", "throughput": 2507.43, "total_tokens": 8111952}
|
| 4285 |
+
{"current_steps": 21330, "total_steps": 22320, "loss": 0.1695, "lr": 2.9964992945579475e-07, "epoch": 19.112903225806452, "percentage": 95.56, "elapsed_time": "0:53:55", "remaining_time": "0:02:30", "throughput": 2507.44, "total_tokens": 8113808}
|
| 4286 |
+
{"current_steps": 21335, "total_steps": 22320, "loss": 0.1129, "lr": 2.966398235609319e-07, "epoch": 19.1173835125448, "percentage": 95.59, "elapsed_time": "0:53:56", "remaining_time": "0:02:29", "throughput": 2507.5, "total_tokens": 8115984}
|
| 4287 |
+
{"current_steps": 21340, "total_steps": 22320, "loss": 0.2814, "lr": 2.9364482272288274e-07, "epoch": 19.121863799283155, "percentage": 95.61, "elapsed_time": "0:53:57", "remaining_time": "0:02:28", "throughput": 2507.54, "total_tokens": 8117904}
|
| 4288 |
+
{"current_steps": 21345, "total_steps": 22320, "loss": 0.1146, "lr": 2.906649287729574e-07, "epoch": 19.126344086021504, "percentage": 95.63, "elapsed_time": "0:53:58", "remaining_time": "0:02:27", "throughput": 2507.56, "total_tokens": 8119792}
|
| 4289 |
+
{"current_steps": 21350, "total_steps": 22320, "loss": 0.2306, "lr": 2.8770014353323716e-07, "epoch": 19.130824372759857, "percentage": 95.65, "elapsed_time": "0:53:58", "remaining_time": "0:02:27", "throughput": 2507.61, "total_tokens": 8121936}
|
| 4290 |
+
{"current_steps": 21355, "total_steps": 22320, "loss": 0.1483, "lr": 2.847504688165609e-07, "epoch": 19.135304659498207, "percentage": 95.68, "elapsed_time": "0:53:59", "remaining_time": "0:02:26", "throughput": 2507.66, "total_tokens": 8124048}
|
| 4291 |
+
{"current_steps": 21360, "total_steps": 22320, "loss": 0.2019, "lr": 2.81815906426533e-07, "epoch": 19.13978494623656, "percentage": 95.7, "elapsed_time": "0:54:00", "remaining_time": "0:02:25", "throughput": 2507.65, "total_tokens": 8125904}
|
| 4292 |
+
{"current_steps": 21365, "total_steps": 22320, "loss": 0.051, "lr": 2.788964581575071e-07, "epoch": 19.14426523297491, "percentage": 95.72, "elapsed_time": "0:54:01", "remaining_time": "0:02:24", "throughput": 2507.7, "total_tokens": 8127952}
|
| 4293 |
+
{"current_steps": 21370, "total_steps": 22320, "loss": 0.2839, "lr": 2.7599212579460796e-07, "epoch": 19.148745519713263, "percentage": 95.74, "elapsed_time": "0:54:01", "remaining_time": "0:02:24", "throughput": 2507.72, "total_tokens": 8129808}
|
| 4294 |
+
{"current_steps": 21375, "total_steps": 22320, "loss": 0.1432, "lr": 2.7310291111370957e-07, "epoch": 19.153225806451612, "percentage": 95.77, "elapsed_time": "0:54:02", "remaining_time": "0:02:23", "throughput": 2507.73, "total_tokens": 8131664}
|
| 4295 |
+
{"current_steps": 21380, "total_steps": 22320, "loss": 0.0073, "lr": 2.702288158814459e-07, "epoch": 19.157706093189965, "percentage": 95.79, "elapsed_time": "0:54:03", "remaining_time": "0:02:22", "throughput": 2507.8, "total_tokens": 8133872}
|
| 4296 |
+
{"current_steps": 21385, "total_steps": 22320, "loss": 0.1317, "lr": 2.6736984185520284e-07, "epoch": 19.162186379928315, "percentage": 95.81, "elapsed_time": "0:54:04", "remaining_time": "0:02:21", "throughput": 2507.84, "total_tokens": 8135728}
|
| 4297 |
+
{"current_steps": 21390, "total_steps": 22320, "loss": 0.0217, "lr": 2.64525990783121e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:54:04", "remaining_time": "0:02:21", "throughput": 2507.84, "total_tokens": 8137616}
|
| 4298 |
+
{"current_steps": 21395, "total_steps": 22320, "loss": 0.0793, "lr": 2.616972644040927e-07, "epoch": 19.171146953405017, "percentage": 95.86, "elapsed_time": "0:54:05", "remaining_time": "0:02:20", "throughput": 2507.86, "total_tokens": 8139600}
|
| 4299 |
+
{"current_steps": 21400, "total_steps": 22320, "loss": 0.061, "lr": 2.5888366444776766e-07, "epoch": 19.17562724014337, "percentage": 95.88, "elapsed_time": "0:54:06", "remaining_time": "0:02:19", "throughput": 2507.86, "total_tokens": 8141424}
|
| 4300 |
+
{"current_steps": 21405, "total_steps": 22320, "loss": 0.1463, "lr": 2.56085192634542e-07, "epoch": 19.18010752688172, "percentage": 95.9, "elapsed_time": "0:54:07", "remaining_time": "0:02:18", "throughput": 2507.9, "total_tokens": 8143344}
|
| 4301 |
+
{"current_steps": 21410, "total_steps": 22320, "loss": 0.1098, "lr": 2.5330185067556633e-07, "epoch": 19.184587813620073, "percentage": 95.92, "elapsed_time": "0:54:07", "remaining_time": "0:02:18", "throughput": 2507.93, "total_tokens": 8145392}
|
| 4302 |
+
{"current_steps": 21415, "total_steps": 22320, "loss": 0.1295, "lr": 2.505336402727293e-07, "epoch": 19.189068100358423, "percentage": 95.95, "elapsed_time": "0:54:08", "remaining_time": "0:02:17", "throughput": 2507.95, "total_tokens": 8147184}
|
| 4303 |
+
{"current_steps": 21420, "total_steps": 22320, "loss": 0.0373, "lr": 2.477805631186741e-07, "epoch": 19.193548387096776, "percentage": 95.97, "elapsed_time": "0:54:09", "remaining_time": "0:02:16", "throughput": 2507.98, "total_tokens": 8149136}
|
| 4304 |
+
{"current_steps": 21425, "total_steps": 22320, "loss": 0.0361, "lr": 2.4504262089679873e-07, "epoch": 19.198028673835125, "percentage": 95.99, "elapsed_time": "0:54:10", "remaining_time": "0:02:15", "throughput": 2507.98, "total_tokens": 8151024}
|
| 4305 |
+
{"current_steps": 21430, "total_steps": 22320, "loss": 0.1043, "lr": 2.423198152812306e-07, "epoch": 19.20250896057348, "percentage": 96.01, "elapsed_time": "0:54:10", "remaining_time": "0:02:15", "throughput": 2508.01, "total_tokens": 8152848}
|
| 4306 |
+
{"current_steps": 21435, "total_steps": 22320, "loss": 0.1521, "lr": 2.396121479368546e-07, "epoch": 19.206989247311828, "percentage": 96.03, "elapsed_time": "0:54:11", "remaining_time": "0:02:14", "throughput": 2508.03, "total_tokens": 8154768}
|
| 4307 |
+
{"current_steps": 21440, "total_steps": 22320, "loss": 0.178, "lr": 2.3691962051929362e-07, "epoch": 19.211469534050178, "percentage": 96.06, "elapsed_time": "0:54:12", "remaining_time": "0:02:13", "throughput": 2508.06, "total_tokens": 8156752}
|
| 4308 |
+
{"current_steps": 21445, "total_steps": 22320, "loss": 0.0562, "lr": 2.3424223467491124e-07, "epoch": 19.21594982078853, "percentage": 96.08, "elapsed_time": "0:54:13", "remaining_time": "0:02:12", "throughput": 2508.11, "total_tokens": 8158928}
|
| 4309 |
+
{"current_steps": 21450, "total_steps": 22320, "loss": 0.0081, "lr": 2.3157999204081458e-07, "epoch": 19.22043010752688, "percentage": 96.1, "elapsed_time": "0:54:13", "remaining_time": "0:02:11", "throughput": 2508.12, "total_tokens": 8160752}
|
| 4310 |
+
{"current_steps": 21455, "total_steps": 22320, "loss": 0.0685, "lr": 2.289328942448571e-07, "epoch": 19.224910394265233, "percentage": 96.12, "elapsed_time": "0:54:14", "remaining_time": "0:02:11", "throughput": 2508.15, "total_tokens": 8162896}
|
| 4311 |
+
{"current_steps": 21460, "total_steps": 22320, "loss": 0.1462, "lr": 2.263009429056273e-07, "epoch": 19.229390681003583, "percentage": 96.15, "elapsed_time": "0:54:15", "remaining_time": "0:02:10", "throughput": 2508.15, "total_tokens": 8164592}
|
| 4312 |
+
{"current_steps": 21465, "total_steps": 22320, "loss": 0.1059, "lr": 2.236841396324435e-07, "epoch": 19.233870967741936, "percentage": 96.17, "elapsed_time": "0:54:15", "remaining_time": "0:02:09", "throughput": 2508.19, "total_tokens": 8166544}
|
| 4313 |
+
{"current_steps": 21470, "total_steps": 22320, "loss": 0.2429, "lr": 2.2108248602537852e-07, "epoch": 19.238351254480285, "percentage": 96.19, "elapsed_time": "0:54:16", "remaining_time": "0:02:08", "throughput": 2508.22, "total_tokens": 8168496}
|
| 4314 |
+
{"current_steps": 21475, "total_steps": 22320, "loss": 0.0612, "lr": 2.1849598367522927e-07, "epoch": 19.24283154121864, "percentage": 96.21, "elapsed_time": "0:54:17", "remaining_time": "0:02:08", "throughput": 2508.25, "total_tokens": 8170384}
|
| 4315 |
+
{"current_steps": 21480, "total_steps": 22320, "loss": 0.2851, "lr": 2.1592463416353347e-07, "epoch": 19.247311827956988, "percentage": 96.24, "elapsed_time": "0:54:18", "remaining_time": "0:02:07", "throughput": 2508.28, "total_tokens": 8172368}
|
| 4316 |
+
{"current_steps": 21485, "total_steps": 22320, "loss": 0.1414, "lr": 2.133684390625612e-07, "epoch": 19.25179211469534, "percentage": 96.26, "elapsed_time": "0:54:18", "remaining_time": "0:02:06", "throughput": 2508.29, "total_tokens": 8174288}
|
| 4317 |
+
{"current_steps": 21490, "total_steps": 22320, "loss": 0.1324, "lr": 2.1082739993531774e-07, "epoch": 19.25627240143369, "percentage": 96.28, "elapsed_time": "0:54:19", "remaining_time": "0:02:05", "throughput": 2508.31, "total_tokens": 8176240}
|
| 4318 |
+
{"current_steps": 21495, "total_steps": 22320, "loss": 0.1868, "lr": 2.0830151833554078e-07, "epoch": 19.260752688172044, "percentage": 96.3, "elapsed_time": "0:54:20", "remaining_time": "0:02:05", "throughput": 2508.34, "total_tokens": 8178128}
|
| 4319 |
+
{"current_steps": 21500, "total_steps": 22320, "loss": 0.3008, "lr": 2.0579079580770043e-07, "epoch": 19.265232974910393, "percentage": 96.33, "elapsed_time": "0:54:21", "remaining_time": "0:02:04", "throughput": 2508.36, "total_tokens": 8179984}
|
| 4320 |
+
{"current_steps": 21505, "total_steps": 22320, "loss": 0.3075, "lr": 2.0329523388699368e-07, "epoch": 19.269713261648747, "percentage": 96.35, "elapsed_time": "0:54:21", "remaining_time": "0:02:03", "throughput": 2508.37, "total_tokens": 8181808}
|
| 4321 |
+
{"current_steps": 21510, "total_steps": 22320, "loss": 0.1123, "lr": 2.0081483409935542e-07, "epoch": 19.274193548387096, "percentage": 96.37, "elapsed_time": "0:54:22", "remaining_time": "0:02:02", "throughput": 2508.4, "total_tokens": 8183696}
|
| 4322 |
+
{"current_steps": 21515, "total_steps": 22320, "loss": 0.272, "lr": 1.9834959796144192e-07, "epoch": 19.27867383512545, "percentage": 96.39, "elapsed_time": "0:54:23", "remaining_time": "0:02:02", "throughput": 2508.43, "total_tokens": 8185680}
|
| 4323 |
+
{"current_steps": 21520, "total_steps": 22320, "loss": 0.0905, "lr": 1.9589952698064461e-07, "epoch": 19.2831541218638, "percentage": 96.42, "elapsed_time": "0:54:23", "remaining_time": "0:02:01", "throughput": 2508.44, "total_tokens": 8187504}
|
| 4324 |
+
{"current_steps": 21525, "total_steps": 22320, "loss": 0.1905, "lr": 1.9346462265507072e-07, "epoch": 19.287634408602152, "percentage": 96.44, "elapsed_time": "0:54:24", "remaining_time": "0:02:00", "throughput": 2508.46, "total_tokens": 8189360}
|
| 4325 |
+
{"current_steps": 21530, "total_steps": 22320, "loss": 0.0378, "lr": 1.9104488647356533e-07, "epoch": 19.2921146953405, "percentage": 96.46, "elapsed_time": "0:54:25", "remaining_time": "0:01:59", "throughput": 2508.45, "total_tokens": 8191344}
|
| 4326 |
+
{"current_steps": 21535, "total_steps": 22320, "loss": 0.0857, "lr": 1.8864031991569775e-07, "epoch": 19.296594982078854, "percentage": 96.48, "elapsed_time": "0:54:26", "remaining_time": "0:01:59", "throughput": 2508.51, "total_tokens": 8193264}
|
| 4327 |
+
{"current_steps": 21540, "total_steps": 22320, "loss": 0.2118, "lr": 1.8625092445175296e-07, "epoch": 19.301075268817204, "percentage": 96.51, "elapsed_time": "0:54:26", "remaining_time": "0:01:58", "throughput": 2508.52, "total_tokens": 8194992}
|
| 4328 |
+
{"current_steps": 21545, "total_steps": 22320, "loss": 0.064, "lr": 1.838767015427456e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:54:27", "remaining_time": "0:01:57", "throughput": 2508.54, "total_tokens": 8196880}
|
| 4329 |
+
{"current_steps": 21550, "total_steps": 22320, "loss": 0.185, "lr": 1.8151765264041443e-07, "epoch": 19.310035842293907, "percentage": 96.55, "elapsed_time": "0:54:28", "remaining_time": "0:01:56", "throughput": 2508.56, "total_tokens": 8198672}
|
| 4330 |
+
{"current_steps": 21555, "total_steps": 22320, "loss": 0.0816, "lr": 1.7917377918721668e-07, "epoch": 19.31451612903226, "percentage": 96.57, "elapsed_time": "0:54:29", "remaining_time": "0:01:56", "throughput": 2508.57, "total_tokens": 8200592}
|
| 4331 |
+
{"current_steps": 21560, "total_steps": 22320, "loss": 0.0551, "lr": 1.768450826163337e-07, "epoch": 19.31899641577061, "percentage": 96.59, "elapsed_time": "0:54:29", "remaining_time": "0:01:55", "throughput": 2508.6, "total_tokens": 8202480}
|
| 4332 |
+
{"current_steps": 21565, "total_steps": 22320, "loss": 0.0962, "lr": 1.7453156435165986e-07, "epoch": 19.32347670250896, "percentage": 96.62, "elapsed_time": "0:54:30", "remaining_time": "0:01:54", "throughput": 2508.61, "total_tokens": 8204304}
|
| 4333 |
+
{"current_steps": 21570, "total_steps": 22320, "loss": 0.1223, "lr": 1.722332258078163e-07, "epoch": 19.327956989247312, "percentage": 96.64, "elapsed_time": "0:54:31", "remaining_time": "0:01:53", "throughput": 2508.63, "total_tokens": 8206192}
|
| 4334 |
+
{"current_steps": 21575, "total_steps": 22320, "loss": 0.1944, "lr": 1.6995006839014006e-07, "epoch": 19.33243727598566, "percentage": 96.66, "elapsed_time": "0:54:31", "remaining_time": "0:01:52", "throughput": 2508.67, "total_tokens": 8208176}
|
| 4335 |
+
{"current_steps": 21580, "total_steps": 22320, "loss": 0.2817, "lr": 1.6768209349468656e-07, "epoch": 19.336917562724015, "percentage": 96.68, "elapsed_time": "0:54:32", "remaining_time": "0:01:52", "throughput": 2508.68, "total_tokens": 8209936}
|
| 4336 |
+
{"current_steps": 21585, "total_steps": 22320, "loss": 0.0396, "lr": 1.6542930250822152e-07, "epoch": 19.341397849462364, "percentage": 96.71, "elapsed_time": "0:54:33", "remaining_time": "0:01:51", "throughput": 2508.71, "total_tokens": 8211824}
|
| 4337 |
+
{"current_steps": 21590, "total_steps": 22320, "loss": 0.0076, "lr": 1.6319169680823477e-07, "epoch": 19.345878136200717, "percentage": 96.73, "elapsed_time": "0:54:34", "remaining_time": "0:01:50", "throughput": 2508.7, "total_tokens": 8213616}
|
| 4338 |
+
{"current_steps": 21595, "total_steps": 22320, "loss": 0.0401, "lr": 1.6096927776292624e-07, "epoch": 19.350358422939067, "percentage": 96.75, "elapsed_time": "0:54:34", "remaining_time": "0:01:49", "throughput": 2508.73, "total_tokens": 8215504}
|
| 4339 |
+
{"current_steps": 21600, "total_steps": 22320, "loss": 0.1769, "lr": 1.5876204673121176e-07, "epoch": 19.35483870967742, "percentage": 96.77, "elapsed_time": "0:54:35", "remaining_time": "0:01:49", "throughput": 2508.78, "total_tokens": 8217488}
|
| 4340 |
+
{"current_steps": 21605, "total_steps": 22320, "loss": 0.0994, "lr": 1.5657000506271723e-07, "epoch": 19.35931899641577, "percentage": 96.8, "elapsed_time": "0:54:36", "remaining_time": "0:01:48", "throughput": 2508.79, "total_tokens": 8219472}
|
| 4341 |
+
{"current_steps": 21610, "total_steps": 22320, "loss": 0.0643, "lr": 1.5439315409778442e-07, "epoch": 19.363799283154123, "percentage": 96.82, "elapsed_time": "0:54:37", "remaining_time": "0:01:47", "throughput": 2508.82, "total_tokens": 8221424}
|
| 4342 |
+
{"current_steps": 21615, "total_steps": 22320, "loss": 0.1135, "lr": 1.5223149516746527e-07, "epoch": 19.368279569892472, "percentage": 96.84, "elapsed_time": "0:54:37", "remaining_time": "0:01:46", "throughput": 2508.85, "total_tokens": 8223312}
|
| 4343 |
+
{"current_steps": 21620, "total_steps": 22320, "loss": 0.2107, "lr": 1.5008502959352465e-07, "epoch": 19.372759856630825, "percentage": 96.86, "elapsed_time": "0:54:38", "remaining_time": "0:01:46", "throughput": 2508.88, "total_tokens": 8225360}
|
| 4344 |
+
{"current_steps": 21625, "total_steps": 22320, "loss": 0.1746, "lr": 1.4795375868842941e-07, "epoch": 19.377240143369175, "percentage": 96.89, "elapsed_time": "0:54:39", "remaining_time": "0:01:45", "throughput": 2508.92, "total_tokens": 8227472}
|
| 4345 |
+
{"current_steps": 21630, "total_steps": 22320, "loss": 0.2519, "lr": 1.4583768375536765e-07, "epoch": 19.381720430107528, "percentage": 96.91, "elapsed_time": "0:54:40", "remaining_time": "0:01:44", "throughput": 2508.93, "total_tokens": 8229456}
|
| 4346 |
+
{"current_steps": 21635, "total_steps": 22320, "loss": 0.0782, "lr": 1.437368060882266e-07, "epoch": 19.386200716845877, "percentage": 96.93, "elapsed_time": "0:54:40", "remaining_time": "0:01:43", "throughput": 2508.93, "total_tokens": 8231312}
|
| 4347 |
+
{"current_steps": 21640, "total_steps": 22320, "loss": 0.0535, "lr": 1.4165112697160366e-07, "epoch": 19.39068100358423, "percentage": 96.95, "elapsed_time": "0:54:41", "remaining_time": "0:01:43", "throughput": 2508.95, "total_tokens": 8233200}
|
| 4348 |
+
{"current_steps": 21645, "total_steps": 22320, "loss": 0.1168, "lr": 1.3958064768080646e-07, "epoch": 19.39516129032258, "percentage": 96.98, "elapsed_time": "0:54:42", "remaining_time": "0:01:42", "throughput": 2508.99, "total_tokens": 8235184}
|
| 4349 |
+
{"current_steps": 21650, "total_steps": 22320, "loss": 0.1594, "lr": 1.3752536948183903e-07, "epoch": 19.399641577060933, "percentage": 97.0, "elapsed_time": "0:54:42", "remaining_time": "0:01:41", "throughput": 2509.01, "total_tokens": 8237040}
|
| 4350 |
+
{"current_steps": 21655, "total_steps": 22320, "loss": 0.0659, "lr": 1.3548529363142103e-07, "epoch": 19.404121863799283, "percentage": 97.02, "elapsed_time": "0:54:43", "remaining_time": "0:01:40", "throughput": 2509.05, "total_tokens": 8238896}
|
| 4351 |
+
{"current_steps": 21660, "total_steps": 22320, "loss": 0.0334, "lr": 1.3346042137697135e-07, "epoch": 19.408602150537636, "percentage": 97.04, "elapsed_time": "0:54:44", "remaining_time": "0:01:40", "throughput": 2509.08, "total_tokens": 8240944}
|
| 4352 |
+
{"current_steps": 21665, "total_steps": 22320, "loss": 0.1171, "lr": 1.3145075395661345e-07, "epoch": 19.413082437275985, "percentage": 97.07, "elapsed_time": "0:54:45", "remaining_time": "0:01:39", "throughput": 2509.1, "total_tokens": 8242832}
|
| 4353 |
+
{"current_steps": 21670, "total_steps": 22320, "loss": 0.021, "lr": 1.2945629259917546e-07, "epoch": 19.41756272401434, "percentage": 97.09, "elapsed_time": "0:54:45", "remaining_time": "0:01:38", "throughput": 2509.12, "total_tokens": 8244624}
|
| 4354 |
+
{"current_steps": 21675, "total_steps": 22320, "loss": 0.3755, "lr": 1.2747703852418192e-07, "epoch": 19.422043010752688, "percentage": 97.11, "elapsed_time": "0:54:46", "remaining_time": "0:01:37", "throughput": 2509.14, "total_tokens": 8246416}
|
| 4355 |
+
{"current_steps": 21680, "total_steps": 22320, "loss": 0.0535, "lr": 1.2551299294186468e-07, "epoch": 19.42652329749104, "percentage": 97.13, "elapsed_time": "0:54:47", "remaining_time": "0:01:37", "throughput": 2509.16, "total_tokens": 8248368}
|
| 4356 |
+
{"current_steps": 21685, "total_steps": 22320, "loss": 0.0216, "lr": 1.2356415705315483e-07, "epoch": 19.43100358422939, "percentage": 97.16, "elapsed_time": "0:54:48", "remaining_time": "0:01:36", "throughput": 2509.16, "total_tokens": 8250160}
|
| 4357 |
+
{"current_steps": 21690, "total_steps": 22320, "loss": 0.051, "lr": 1.2163053204968521e-07, "epoch": 19.43548387096774, "percentage": 97.18, "elapsed_time": "0:54:48", "remaining_time": "0:01:35", "throughput": 2509.21, "total_tokens": 8252144}
|
| 4358 |
+
{"current_steps": 21695, "total_steps": 22320, "loss": 0.1598, "lr": 1.1971211911378232e-07, "epoch": 19.439964157706093, "percentage": 97.2, "elapsed_time": "0:54:49", "remaining_time": "0:01:34", "throughput": 2509.24, "total_tokens": 8254032}
|
| 4359 |
+
{"current_steps": 21700, "total_steps": 22320, "loss": 0.1335, "lr": 1.178089194184745e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:54:50", "remaining_time": "0:01:34", "throughput": 2509.26, "total_tokens": 8255824}
|
| 4360 |
+
{"current_steps": 21705, "total_steps": 22320, "loss": 0.1949, "lr": 1.159209341274864e-07, "epoch": 19.448924731182796, "percentage": 97.24, "elapsed_time": "0:54:50", "remaining_time": "0:01:33", "throughput": 2509.28, "total_tokens": 8257776}
|
| 4361 |
+
{"current_steps": 21710, "total_steps": 22320, "loss": 0.0557, "lr": 1.140481643952418e-07, "epoch": 19.453405017921146, "percentage": 97.27, "elapsed_time": "0:54:51", "remaining_time": "0:01:32", "throughput": 2509.3, "total_tokens": 8259632}
|
| 4362 |
+
{"current_steps": 21715, "total_steps": 22320, "loss": 0.2386, "lr": 1.121906113668636e-07, "epoch": 19.4578853046595, "percentage": 97.29, "elapsed_time": "0:54:52", "remaining_time": "0:01:31", "throughput": 2509.32, "total_tokens": 8261552}
|
| 4363 |
+
{"current_steps": 21720, "total_steps": 22320, "loss": 0.1068, "lr": 1.1034827617816545e-07, "epoch": 19.462365591397848, "percentage": 97.31, "elapsed_time": "0:54:53", "remaining_time": "0:01:30", "throughput": 2509.3, "total_tokens": 8263312}
|
| 4364 |
+
{"current_steps": 21725, "total_steps": 22320, "loss": 0.1378, "lr": 1.0852115995565182e-07, "epoch": 19.4668458781362, "percentage": 97.33, "elapsed_time": "0:54:53", "remaining_time": "0:01:30", "throughput": 2509.31, "total_tokens": 8265136}
|
| 4365 |
+
{"current_steps": 21730, "total_steps": 22320, "loss": 0.1492, "lr": 1.0670926381653179e-07, "epoch": 19.47132616487455, "percentage": 97.36, "elapsed_time": "0:54:54", "remaining_time": "0:01:29", "throughput": 2509.31, "total_tokens": 8266928}
|
| 4366 |
+
{"current_steps": 21735, "total_steps": 22320, "loss": 0.1371, "lr": 1.049125888686997e-07, "epoch": 19.475806451612904, "percentage": 97.38, "elapsed_time": "0:54:55", "remaining_time": "0:01:28", "throughput": 2509.35, "total_tokens": 8268848}
|
| 4367 |
+
{"current_steps": 21740, "total_steps": 22320, "loss": 0.1616, "lr": 1.0313113621075177e-07, "epoch": 19.480286738351253, "percentage": 97.4, "elapsed_time": "0:54:55", "remaining_time": "0:01:27", "throughput": 2509.38, "total_tokens": 8270768}
|
| 4368 |
+
{"current_steps": 21745, "total_steps": 22320, "loss": 0.1293, "lr": 1.0136490693196665e-07, "epoch": 19.484767025089607, "percentage": 97.42, "elapsed_time": "0:54:56", "remaining_time": "0:01:27", "throughput": 2509.36, "total_tokens": 8272432}
|
| 4369 |
+
{"current_steps": 21750, "total_steps": 22320, "loss": 0.1344, "lr": 9.961390211231658e-08, "epoch": 19.489247311827956, "percentage": 97.45, "elapsed_time": "0:54:57", "remaining_time": "0:01:26", "throughput": 2509.41, "total_tokens": 8274384}
|
| 4370 |
+
{"current_steps": 21755, "total_steps": 22320, "loss": 0.2565, "lr": 9.787812282247011e-08, "epoch": 19.49372759856631, "percentage": 97.47, "elapsed_time": "0:54:58", "remaining_time": "0:01:25", "throughput": 2509.42, "total_tokens": 8276304}
|
| 4371 |
+
{"current_steps": 21760, "total_steps": 22320, "loss": 0.3086, "lr": 9.615757012378101e-08, "epoch": 19.49820788530466, "percentage": 97.49, "elapsed_time": "0:54:58", "remaining_time": "0:01:24", "throughput": 2509.44, "total_tokens": 8278256}
|
| 4372 |
+
{"current_steps": 21765, "total_steps": 22320, "loss": 0.0547, "lr": 9.445224506829664e-08, "epoch": 19.502688172043012, "percentage": 97.51, "elapsed_time": "0:54:59", "remaining_time": "0:01:24", "throughput": 2509.47, "total_tokens": 8280272}
|
| 4373 |
+
{"current_steps": 21770, "total_steps": 22320, "loss": 0.1869, "lr": 9.276214869874678e-08, "epoch": 19.50716845878136, "percentage": 97.54, "elapsed_time": "0:55:00", "remaining_time": "0:01:23", "throughput": 2509.48, "total_tokens": 8282128}
|
| 4374 |
+
{"current_steps": 21775, "total_steps": 22320, "loss": 0.1882, "lr": 9.108728204855754e-08, "epoch": 19.511648745519715, "percentage": 97.56, "elapsed_time": "0:55:01", "remaining_time": "0:01:22", "throughput": 2509.51, "total_tokens": 8284112}
|
| 4375 |
+
{"current_steps": 21780, "total_steps": 22320, "loss": 0.0404, "lr": 8.942764614183752e-08, "epoch": 19.516129032258064, "percentage": 97.58, "elapsed_time": "0:55:01", "remaining_time": "0:01:21", "throughput": 2509.52, "total_tokens": 8286032}
|
| 4376 |
+
{"current_steps": 21785, "total_steps": 22320, "loss": 0.0109, "lr": 8.778324199338329e-08, "epoch": 19.520609318996417, "percentage": 97.6, "elapsed_time": "0:55:02", "remaining_time": "0:01:21", "throughput": 2509.55, "total_tokens": 8287920}
|
| 4377 |
+
{"current_steps": 21790, "total_steps": 22320, "loss": 0.1301, "lr": 8.615407060867664e-08, "epoch": 19.525089605734767, "percentage": 97.63, "elapsed_time": "0:55:03", "remaining_time": "0:01:20", "throughput": 2509.62, "total_tokens": 8290128}
|
| 4378 |
+
{"current_steps": 21795, "total_steps": 22320, "loss": 0.2891, "lr": 8.454013298389018e-08, "epoch": 19.52956989247312, "percentage": 97.65, "elapsed_time": "0:55:04", "remaining_time": "0:01:19", "throughput": 2509.62, "total_tokens": 8291856}
|
| 4379 |
+
{"current_steps": 21800, "total_steps": 22320, "loss": 0.2344, "lr": 8.294143010587896e-08, "epoch": 19.53405017921147, "percentage": 97.67, "elapsed_time": "0:55:04", "remaining_time": "0:01:18", "throughput": 2509.64, "total_tokens": 8293712}
|
| 4380 |
+
{"current_steps": 21805, "total_steps": 22320, "loss": 0.1651, "lr": 8.135796295217768e-08, "epoch": 19.538530465949822, "percentage": 97.69, "elapsed_time": "0:55:05", "remaining_time": "0:01:18", "throughput": 2509.63, "total_tokens": 8295504}
|
| 4381 |
+
{"current_steps": 21810, "total_steps": 22320, "loss": 0.1649, "lr": 7.978973249101185e-08, "epoch": 19.543010752688172, "percentage": 97.72, "elapsed_time": "0:55:06", "remaining_time": "0:01:17", "throughput": 2509.69, "total_tokens": 8297424}
|
| 4382 |
+
{"current_steps": 21815, "total_steps": 22320, "loss": 0.169, "lr": 7.823673968128665e-08, "epoch": 19.547491039426525, "percentage": 97.74, "elapsed_time": "0:55:06", "remaining_time": "0:01:16", "throughput": 2509.69, "total_tokens": 8299280}
|
| 4383 |
+
{"current_steps": 21820, "total_steps": 22320, "loss": 0.2177, "lr": 7.669898547258969e-08, "epoch": 19.551971326164875, "percentage": 97.76, "elapsed_time": "0:55:07", "remaining_time": "0:01:15", "throughput": 2509.73, "total_tokens": 8301392}
|
| 4384 |
+
{"current_steps": 21825, "total_steps": 22320, "loss": 0.2316, "lr": 7.51764708051994e-08, "epoch": 19.556451612903224, "percentage": 97.78, "elapsed_time": "0:55:08", "remaining_time": "0:01:15", "throughput": 2509.74, "total_tokens": 8303248}
|
| 4385 |
+
{"current_steps": 21830, "total_steps": 22320, "loss": 0.3046, "lr": 7.366919661006278e-08, "epoch": 19.560931899641577, "percentage": 97.8, "elapsed_time": "0:55:09", "remaining_time": "0:01:14", "throughput": 2509.74, "total_tokens": 8305104}
|
| 4386 |
+
{"current_steps": 21835, "total_steps": 22320, "loss": 0.2296, "lr": 7.217716380881479e-08, "epoch": 19.565412186379927, "percentage": 97.83, "elapsed_time": "0:55:09", "remaining_time": "0:01:13", "throughput": 2509.77, "total_tokens": 8307088}
|
| 4387 |
+
{"current_steps": 21840, "total_steps": 22320, "loss": 0.1472, "lr": 7.07003733137701e-08, "epoch": 19.56989247311828, "percentage": 97.85, "elapsed_time": "0:55:10", "remaining_time": "0:01:12", "throughput": 2509.79, "total_tokens": 8308976}
|
| 4388 |
+
{"current_steps": 21845, "total_steps": 22320, "loss": 0.1487, "lr": 6.923882602792586e-08, "epoch": 19.57437275985663, "percentage": 97.87, "elapsed_time": "0:55:11", "remaining_time": "0:01:12", "throughput": 2509.8, "total_tokens": 8310736}
|
| 4389 |
+
{"current_steps": 21850, "total_steps": 22320, "loss": 0.0414, "lr": 6.77925228449533e-08, "epoch": 19.578853046594983, "percentage": 97.89, "elapsed_time": "0:55:12", "remaining_time": "0:01:11", "throughput": 2509.86, "total_tokens": 8312784}
|
| 4390 |
+
{"current_steps": 21855, "total_steps": 22320, "loss": 0.1737, "lr": 6.636146464920889e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:55:12", "remaining_time": "0:01:10", "throughput": 2509.91, "total_tokens": 8314832}
|
| 4391 |
+
{"current_steps": 21860, "total_steps": 22320, "loss": 0.164, "lr": 6.494565231572048e-08, "epoch": 19.587813620071685, "percentage": 97.94, "elapsed_time": "0:55:13", "remaining_time": "0:01:09", "throughput": 2509.94, "total_tokens": 8316880}
|
| 4392 |
+
{"current_steps": 21865, "total_steps": 22320, "loss": 0.1757, "lr": 6.354508671019832e-08, "epoch": 19.592293906810035, "percentage": 97.96, "elapsed_time": "0:55:14", "remaining_time": "0:01:08", "throughput": 2509.94, "total_tokens": 8318768}
|
| 4393 |
+
{"current_steps": 21870, "total_steps": 22320, "loss": 0.0747, "lr": 6.215976868902962e-08, "epoch": 19.596774193548388, "percentage": 97.98, "elapsed_time": "0:55:15", "remaining_time": "0:01:08", "throughput": 2509.99, "total_tokens": 8320912}
|
| 4394 |
+
{"current_steps": 21875, "total_steps": 22320, "loss": 0.1032, "lr": 6.078969909927845e-08, "epoch": 19.601254480286737, "percentage": 98.01, "elapsed_time": "0:55:15", "remaining_time": "0:01:07", "throughput": 2510.01, "total_tokens": 8322768}
|
| 4395 |
+
{"current_steps": 21880, "total_steps": 22320, "loss": 0.1944, "lr": 5.9434878778683036e-08, "epoch": 19.60573476702509, "percentage": 98.03, "elapsed_time": "0:55:16", "remaining_time": "0:01:06", "throughput": 2510.04, "total_tokens": 8324592}
|
| 4396 |
+
{"current_steps": 21885, "total_steps": 22320, "loss": 0.0379, "lr": 5.809530855565848e-08, "epoch": 19.61021505376344, "percentage": 98.05, "elapsed_time": "0:55:17", "remaining_time": "0:01:05", "throughput": 2510.07, "total_tokens": 8326480}
|
| 4397 |
+
{"current_steps": 21890, "total_steps": 22320, "loss": 0.2194, "lr": 5.6770989249294024e-08, "epoch": 19.614695340501793, "percentage": 98.07, "elapsed_time": "0:55:17", "remaining_time": "0:01:05", "throughput": 2510.08, "total_tokens": 8328336}
|
| 4398 |
+
{"current_steps": 21895, "total_steps": 22320, "loss": 0.0227, "lr": 5.54619216693586e-08, "epoch": 19.619175627240143, "percentage": 98.1, "elapsed_time": "0:55:18", "remaining_time": "0:01:04", "throughput": 2510.1, "total_tokens": 8330128}
|
| 4399 |
+
{"current_steps": 21900, "total_steps": 22320, "loss": 0.154, "lr": 5.4168106616286905e-08, "epoch": 19.623655913978496, "percentage": 98.12, "elapsed_time": "0:55:19", "remaining_time": "0:01:03", "throughput": 2510.12, "total_tokens": 8331920}
|
| 4400 |
+
{"current_steps": 21905, "total_steps": 22320, "loss": 0.1092, "lr": 5.2889544881193355e-08, "epoch": 19.628136200716845, "percentage": 98.14, "elapsed_time": "0:55:20", "remaining_time": "0:01:02", "throughput": 2510.15, "total_tokens": 8333744}
|
| 4401 |
+
{"current_steps": 21910, "total_steps": 22320, "loss": 0.14, "lr": 5.162623724586646e-08, "epoch": 19.6326164874552, "percentage": 98.16, "elapsed_time": "0:55:20", "remaining_time": "0:01:02", "throughput": 2510.17, "total_tokens": 8335600}
|
| 4402 |
+
{"current_steps": 21915, "total_steps": 22320, "loss": 0.0299, "lr": 5.037818448276055e-08, "epoch": 19.637096774193548, "percentage": 98.19, "elapsed_time": "0:55:21", "remaining_time": "0:01:01", "throughput": 2510.2, "total_tokens": 8337488}
|
| 4403 |
+
{"current_steps": 21920, "total_steps": 22320, "loss": 0.1109, "lr": 4.9145387355012395e-08, "epoch": 19.6415770609319, "percentage": 98.21, "elapsed_time": "0:55:22", "remaining_time": "0:01:00", "throughput": 2510.24, "total_tokens": 8339408}
|
| 4404 |
+
{"current_steps": 21925, "total_steps": 22320, "loss": 0.1433, "lr": 4.7927846616424576e-08, "epoch": 19.64605734767025, "percentage": 98.23, "elapsed_time": "0:55:22", "remaining_time": "0:00:59", "throughput": 2510.27, "total_tokens": 8341456}
|
| 4405 |
+
{"current_steps": 21930, "total_steps": 22320, "loss": 0.2887, "lr": 4.6725563011465465e-08, "epoch": 19.650537634408604, "percentage": 98.25, "elapsed_time": "0:55:23", "remaining_time": "0:00:59", "throughput": 2510.29, "total_tokens": 8343408}
|
| 4406 |
+
{"current_steps": 21935, "total_steps": 22320, "loss": 0.0029, "lr": 4.5538537275288675e-08, "epoch": 19.655017921146953, "percentage": 98.28, "elapsed_time": "0:55:24", "remaining_time": "0:00:58", "throughput": 2510.3, "total_tokens": 8345168}
|
| 4407 |
+
{"current_steps": 21940, "total_steps": 22320, "loss": 0.1786, "lr": 4.43667701337025e-08, "epoch": 19.659498207885306, "percentage": 98.3, "elapsed_time": "0:55:25", "remaining_time": "0:00:57", "throughput": 2510.32, "total_tokens": 8347024}
|
| 4408 |
+
{"current_steps": 21945, "total_steps": 22320, "loss": 0.2689, "lr": 4.321026230319769e-08, "epoch": 19.663978494623656, "percentage": 98.32, "elapsed_time": "0:55:25", "remaining_time": "0:00:56", "throughput": 2510.35, "total_tokens": 8349136}
|
| 4409 |
+
{"current_steps": 21950, "total_steps": 22320, "loss": 0.1521, "lr": 4.2069014490930816e-08, "epoch": 19.668458781362006, "percentage": 98.34, "elapsed_time": "0:55:26", "remaining_time": "0:00:56", "throughput": 2510.37, "total_tokens": 8351056}
|
| 4410 |
+
{"current_steps": 21955, "total_steps": 22320, "loss": 0.1018, "lr": 4.0943027394721446e-08, "epoch": 19.67293906810036, "percentage": 98.36, "elapsed_time": "0:55:27", "remaining_time": "0:00:55", "throughput": 2510.39, "total_tokens": 8352848}
|
| 4411 |
+
{"current_steps": 21960, "total_steps": 22320, "loss": 0.1807, "lr": 3.983230170306607e-08, "epoch": 19.677419354838708, "percentage": 98.39, "elapsed_time": "0:55:27", "remaining_time": "0:00:54", "throughput": 2510.4, "total_tokens": 8354608}
|
| 4412 |
+
{"current_steps": 21965, "total_steps": 22320, "loss": 0.069, "lr": 3.873683809512696e-08, "epoch": 19.68189964157706, "percentage": 98.41, "elapsed_time": "0:55:28", "remaining_time": "0:00:53", "throughput": 2510.44, "total_tokens": 8356528}
|
| 4413 |
+
{"current_steps": 21970, "total_steps": 22320, "loss": 0.0326, "lr": 3.765663724073221e-08, "epoch": 19.68637992831541, "percentage": 98.43, "elapsed_time": "0:55:29", "remaining_time": "0:00:53", "throughput": 2510.45, "total_tokens": 8358352}
|
| 4414 |
+
{"current_steps": 21975, "total_steps": 22320, "loss": 0.071, "lr": 3.659169980038124e-08, "epoch": 19.690860215053764, "percentage": 98.45, "elapsed_time": "0:55:30", "remaining_time": "0:00:52", "throughput": 2510.46, "total_tokens": 8360144}
|
| 4415 |
+
{"current_steps": 21980, "total_steps": 22320, "loss": 0.1011, "lr": 3.5542026425233744e-08, "epoch": 19.695340501792113, "percentage": 98.48, "elapsed_time": "0:55:30", "remaining_time": "0:00:51", "throughput": 2510.47, "total_tokens": 8361904}
|
| 4416 |
+
{"current_steps": 21985, "total_steps": 22320, "loss": 0.2053, "lr": 3.4507617757123524e-08, "epoch": 19.699820788530467, "percentage": 98.5, "elapsed_time": "0:55:31", "remaining_time": "0:00:50", "throughput": 2510.51, "total_tokens": 8363888}
|
| 4417 |
+
{"current_steps": 21990, "total_steps": 22320, "loss": 0.0296, "lr": 3.348847442854741e-08, "epoch": 19.704301075268816, "percentage": 98.52, "elapsed_time": "0:55:32", "remaining_time": "0:00:50", "throughput": 2510.54, "total_tokens": 8365808}
|
| 4418 |
+
{"current_steps": 21995, "total_steps": 22320, "loss": 0.1937, "lr": 3.248459706266527e-08, "epoch": 19.70878136200717, "percentage": 98.54, "elapsed_time": "0:55:32", "remaining_time": "0:00:49", "throughput": 2510.58, "total_tokens": 8367760}
|
| 4419 |
+
{"current_steps": 22000, "total_steps": 22320, "loss": 0.1847, "lr": 3.149598627331107e-08, "epoch": 19.71326164874552, "percentage": 98.57, "elapsed_time": "0:55:33", "remaining_time": "0:00:48", "throughput": 2510.57, "total_tokens": 8369520}
|
| 4420 |
+
{"current_steps": 22005, "total_steps": 22320, "loss": 0.0852, "lr": 3.052264266497351e-08, "epoch": 19.717741935483872, "percentage": 98.59, "elapsed_time": "0:55:34", "remaining_time": "0:00:47", "throughput": 2510.42, "total_tokens": 8371312}
|
| 4421 |
+
{"current_steps": 22010, "total_steps": 22320, "loss": 0.0783, "lr": 2.9564566832815388e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:55:35", "remaining_time": "0:00:46", "throughput": 2510.43, "total_tokens": 8373072}
|
| 4422 |
+
{"current_steps": 22015, "total_steps": 22320, "loss": 0.258, "lr": 2.862175936265421e-08, "epoch": 19.726702508960575, "percentage": 98.63, "elapsed_time": "0:55:36", "remaining_time": "0:00:46", "throughput": 2510.47, "total_tokens": 8374992}
|
| 4423 |
+
{"current_steps": 22020, "total_steps": 22320, "loss": 0.2937, "lr": 2.7694220830978834e-08, "epoch": 19.731182795698924, "percentage": 98.66, "elapsed_time": "0:55:36", "remaining_time": "0:00:45", "throughput": 2510.53, "total_tokens": 8377040}
|
| 4424 |
+
{"current_steps": 22025, "total_steps": 22320, "loss": 0.248, "lr": 2.6781951804943915e-08, "epoch": 19.735663082437277, "percentage": 98.68, "elapsed_time": "0:55:37", "remaining_time": "0:00:44", "throughput": 2510.53, "total_tokens": 8378992}
|
| 4425 |
+
{"current_steps": 22030, "total_steps": 22320, "loss": 0.3085, "lr": 2.5884952842356036e-08, "epoch": 19.740143369175627, "percentage": 98.7, "elapsed_time": "0:55:38", "remaining_time": "0:00:43", "throughput": 2510.53, "total_tokens": 8380784}
|
| 4426 |
+
{"current_steps": 22035, "total_steps": 22320, "loss": 0.1309, "lr": 2.5003224491695897e-08, "epoch": 19.74462365591398, "percentage": 98.72, "elapsed_time": "0:55:38", "remaining_time": "0:00:43", "throughput": 2510.57, "total_tokens": 8382736}
|
| 4427 |
+
{"current_steps": 22040, "total_steps": 22320, "loss": 0.0193, "lr": 2.4136767292104455e-08, "epoch": 19.74910394265233, "percentage": 98.75, "elapsed_time": "0:55:39", "remaining_time": "0:00:42", "throughput": 2510.61, "total_tokens": 8384656}
|
| 4428 |
+
{"current_steps": 22045, "total_steps": 22320, "loss": 0.0261, "lr": 2.3285581773382913e-08, "epoch": 19.753584229390682, "percentage": 98.77, "elapsed_time": "0:55:40", "remaining_time": "0:00:41", "throughput": 2510.65, "total_tokens": 8386736}
|
| 4429 |
+
{"current_steps": 22050, "total_steps": 22320, "loss": 0.2728, "lr": 2.2449668455989946e-08, "epoch": 19.758064516129032, "percentage": 98.79, "elapsed_time": "0:55:41", "remaining_time": "0:00:40", "throughput": 2510.68, "total_tokens": 8388624}
|
| 4430 |
+
{"current_steps": 22055, "total_steps": 22320, "loss": 0.3378, "lr": 2.162902785105836e-08, "epoch": 19.762544802867385, "percentage": 98.81, "elapsed_time": "0:55:41", "remaining_time": "0:00:40", "throughput": 2510.68, "total_tokens": 8390480}
|
| 4431 |
+
{"current_steps": 22060, "total_steps": 22320, "loss": 0.3833, "lr": 2.08236604603701e-08, "epoch": 19.767025089605735, "percentage": 98.84, "elapsed_time": "0:55:42", "remaining_time": "0:00:39", "throughput": 2510.71, "total_tokens": 8392400}
|
| 4432 |
+
{"current_steps": 22065, "total_steps": 22320, "loss": 0.1006, "lr": 2.003356677637569e-08, "epoch": 19.771505376344088, "percentage": 98.86, "elapsed_time": "0:55:43", "remaining_time": "0:00:38", "throughput": 2510.74, "total_tokens": 8394288}
|
| 4433 |
+
{"current_steps": 22070, "total_steps": 22320, "loss": 0.0875, "lr": 1.9258747282183133e-08, "epoch": 19.775985663082437, "percentage": 98.88, "elapsed_time": "0:55:44", "remaining_time": "0:00:37", "throughput": 2510.77, "total_tokens": 8396272}
|
| 4434 |
+
{"current_steps": 22075, "total_steps": 22320, "loss": 0.1026, "lr": 1.849920245156067e-08, "epoch": 19.780465949820787, "percentage": 98.9, "elapsed_time": "0:55:44", "remaining_time": "0:00:37", "throughput": 2510.77, "total_tokens": 8398224}
|
| 4435 |
+
{"current_steps": 22080, "total_steps": 22320, "loss": 0.0859, "lr": 1.7754932748939578e-08, "epoch": 19.78494623655914, "percentage": 98.92, "elapsed_time": "0:55:45", "remaining_time": "0:00:36", "throughput": 2510.8, "total_tokens": 8400208}
|
| 4436 |
+
{"current_steps": 22085, "total_steps": 22320, "loss": 0.3024, "lr": 1.70259386294086e-08, "epoch": 19.78942652329749, "percentage": 98.95, "elapsed_time": "0:55:46", "remaining_time": "0:00:35", "throughput": 2510.83, "total_tokens": 8402192}
|
| 4437 |
+
{"current_steps": 22090, "total_steps": 22320, "loss": 0.0703, "lr": 1.6312220538716728e-08, "epoch": 19.793906810035843, "percentage": 98.97, "elapsed_time": "0:55:47", "remaining_time": "0:00:34", "throughput": 2510.86, "total_tokens": 8404144}
|
| 4438 |
+
{"current_steps": 22095, "total_steps": 22320, "loss": 0.1746, "lr": 1.5613778913270426e-08, "epoch": 19.798387096774192, "percentage": 98.99, "elapsed_time": "0:55:47", "remaining_time": "0:00:34", "throughput": 2510.9, "total_tokens": 8406256}
|
| 4439 |
+
{"current_steps": 22100, "total_steps": 22320, "loss": 0.1918, "lr": 1.493061418013919e-08, "epoch": 19.802867383512545, "percentage": 99.01, "elapsed_time": "0:55:48", "remaining_time": "0:00:33", "throughput": 2510.91, "total_tokens": 8408208}
|
| 4440 |
+
{"current_steps": 22105, "total_steps": 22320, "loss": 0.2665, "lr": 1.4262726757049982e-08, "epoch": 19.807347670250895, "percentage": 99.04, "elapsed_time": "0:55:49", "remaining_time": "0:00:32", "throughput": 2510.91, "total_tokens": 8409936}
|
| 4441 |
+
{"current_steps": 22110, "total_steps": 22320, "loss": 0.1896, "lr": 1.3610117052384463e-08, "epoch": 19.811827956989248, "percentage": 99.06, "elapsed_time": "0:55:50", "remaining_time": "0:00:31", "throughput": 2510.94, "total_tokens": 8411760}
|
| 4442 |
+
{"current_steps": 22115, "total_steps": 22320, "loss": 0.0798, "lr": 1.2972785465190097e-08, "epoch": 19.816308243727597, "percentage": 99.08, "elapsed_time": "0:55:50", "remaining_time": "0:00:31", "throughput": 2510.96, "total_tokens": 8413712}
|
| 4443 |
+
{"current_steps": 22120, "total_steps": 22320, "loss": 0.2088, "lr": 1.2350732385163488e-08, "epoch": 19.82078853046595, "percentage": 99.1, "elapsed_time": "0:55:51", "remaining_time": "0:00:30", "throughput": 2510.97, "total_tokens": 8415536}
|
| 4444 |
+
{"current_steps": 22125, "total_steps": 22320, "loss": 0.1307, "lr": 1.1743958192667048e-08, "epoch": 19.8252688172043, "percentage": 99.13, "elapsed_time": "0:55:52", "remaining_time": "0:00:29", "throughput": 2510.99, "total_tokens": 8417552}
|
| 4445 |
+
{"current_steps": 22130, "total_steps": 22320, "loss": 0.3285, "lr": 1.1152463258715106e-08, "epoch": 19.829749103942653, "percentage": 99.15, "elapsed_time": "0:55:53", "remaining_time": "0:00:28", "throughput": 2511.0, "total_tokens": 8419376}
|
| 4446 |
+
{"current_steps": 22135, "total_steps": 22320, "loss": 0.0881, "lr": 1.0576247944985018e-08, "epoch": 19.834229390681003, "percentage": 99.17, "elapsed_time": "0:55:53", "remaining_time": "0:00:28", "throughput": 2511.03, "total_tokens": 8421296}
|
| 4447 |
+
{"current_steps": 22140, "total_steps": 22320, "loss": 0.2789, "lr": 1.001531260380606e-08, "epoch": 19.838709677419356, "percentage": 99.19, "elapsed_time": "0:55:54", "remaining_time": "0:00:27", "throughput": 2511.03, "total_tokens": 8423152}
|
| 4448 |
+
{"current_steps": 22145, "total_steps": 22320, "loss": 0.0612, "lr": 9.469657578164981e-09, "epoch": 19.843189964157705, "percentage": 99.22, "elapsed_time": "0:55:55", "remaining_time": "0:00:26", "throughput": 2511.05, "total_tokens": 8425008}
|
| 4449 |
+
{"current_steps": 22150, "total_steps": 22320, "loss": 0.0613, "lr": 8.93928320170878e-09, "epoch": 19.84767025089606, "percentage": 99.24, "elapsed_time": "0:55:55", "remaining_time": "0:00:25", "throughput": 2511.09, "total_tokens": 8426864}
|
| 4450 |
+
{"current_steps": 22155, "total_steps": 22320, "loss": 0.041, "lr": 8.424189798739158e-09, "epoch": 19.852150537634408, "percentage": 99.26, "elapsed_time": "0:55:56", "remaining_time": "0:00:24", "throughput": 2511.08, "total_tokens": 8428784}
|
| 4451 |
+
{"current_steps": 22160, "total_steps": 22320, "loss": 0.0983, "lr": 7.924377684212504e-09, "epoch": 19.85663082437276, "percentage": 99.28, "elapsed_time": "0:55:57", "remaining_time": "0:00:24", "throughput": 2511.11, "total_tokens": 8430736}
|
| 4452 |
+
{"current_steps": 22165, "total_steps": 22320, "loss": 0.0259, "lr": 7.43984716373991e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:55:58", "remaining_time": "0:00:23", "throughput": 2511.13, "total_tokens": 8432624}
|
| 4453 |
+
{"current_steps": 22170, "total_steps": 22320, "loss": 0.0838, "lr": 6.970598533598272e-09, "epoch": 19.865591397849464, "percentage": 99.33, "elapsed_time": "0:55:58", "remaining_time": "0:00:22", "throughput": 2511.15, "total_tokens": 8434480}
|
| 4454 |
+
{"current_steps": 22175, "total_steps": 22320, "loss": 0.0114, "lr": 6.5166320807080735e-09, "epoch": 19.870071684587813, "percentage": 99.35, "elapsed_time": "0:55:59", "remaining_time": "0:00:21", "throughput": 2511.17, "total_tokens": 8436336}
|
| 4455 |
+
{"current_steps": 22180, "total_steps": 22320, "loss": 0.3346, "lr": 6.077948082650054e-09, "epoch": 19.874551971326166, "percentage": 99.37, "elapsed_time": "0:56:00", "remaining_time": "0:00:21", "throughput": 2511.2, "total_tokens": 8438256}
|
| 4456 |
+
{"current_steps": 22185, "total_steps": 22320, "loss": 0.0649, "lr": 5.654546807665195e-09, "epoch": 19.879032258064516, "percentage": 99.4, "elapsed_time": "0:56:00", "remaining_time": "0:00:20", "throughput": 2511.2, "total_tokens": 8439984}
|
| 4457 |
+
{"current_steps": 22190, "total_steps": 22320, "loss": 0.2364, "lr": 5.246428514640856e-09, "epoch": 19.88351254480287, "percentage": 99.42, "elapsed_time": "0:56:01", "remaining_time": "0:00:19", "throughput": 2511.21, "total_tokens": 8441808}
|
| 4458 |
+
{"current_steps": 22195, "total_steps": 22320, "loss": 0.1426, "lr": 4.853593453127414e-09, "epoch": 19.88799283154122, "percentage": 99.44, "elapsed_time": "0:56:02", "remaining_time": "0:00:18", "throughput": 2511.24, "total_tokens": 8443696}
|
| 4459 |
+
{"current_steps": 22200, "total_steps": 22320, "loss": 0.0501, "lr": 4.476041863327174e-09, "epoch": 19.892473118279568, "percentage": 99.46, "elapsed_time": "0:56:03", "remaining_time": "0:00:18", "throughput": 2511.27, "total_tokens": 8445584}
|
| 4460 |
+
{"current_steps": 22205, "total_steps": 22320, "loss": 0.2526, "lr": 4.113773976091584e-09, "epoch": 19.89695340501792, "percentage": 99.48, "elapsed_time": "0:56:03", "remaining_time": "0:00:17", "throughput": 2511.29, "total_tokens": 8447536}
|
| 4461 |
+
{"current_steps": 22210, "total_steps": 22320, "loss": 0.0391, "lr": 3.766790012937893e-09, "epoch": 19.90143369175627, "percentage": 99.51, "elapsed_time": "0:56:04", "remaining_time": "0:00:16", "throughput": 2511.34, "total_tokens": 8449424}
|
| 4462 |
+
{"current_steps": 22215, "total_steps": 22320, "loss": 0.3938, "lr": 3.4350901860297214e-09, "epoch": 19.905913978494624, "percentage": 99.53, "elapsed_time": "0:56:05", "remaining_time": "0:00:15", "throughput": 2511.38, "total_tokens": 8451344}
|
| 4463 |
+
{"current_steps": 22220, "total_steps": 22320, "loss": 0.1437, "lr": 3.118674698188162e-09, "epoch": 19.910394265232974, "percentage": 99.55, "elapsed_time": "0:56:05", "remaining_time": "0:00:15", "throughput": 2511.39, "total_tokens": 8453264}
|
| 4464 |
+
{"current_steps": 22225, "total_steps": 22320, "loss": 0.1785, "lr": 2.8175437428862307e-09, "epoch": 19.914874551971327, "percentage": 99.57, "elapsed_time": "0:56:06", "remaining_time": "0:00:14", "throughput": 2511.42, "total_tokens": 8455184}
|
| 4465 |
+
{"current_steps": 22230, "total_steps": 22320, "loss": 0.1042, "lr": 2.5316975042544156e-09, "epoch": 19.919354838709676, "percentage": 99.6, "elapsed_time": "0:56:07", "remaining_time": "0:00:13", "throughput": 2511.45, "total_tokens": 8457072}
|
| 4466 |
+
{"current_steps": 22235, "total_steps": 22320, "loss": 0.0257, "lr": 2.2611361570751276e-09, "epoch": 19.92383512544803, "percentage": 99.62, "elapsed_time": "0:56:08", "remaining_time": "0:00:12", "throughput": 2511.48, "total_tokens": 8458960}
|
| 4467 |
+
{"current_steps": 22240, "total_steps": 22320, "loss": 0.0607, "lr": 2.0058598667854756e-09, "epoch": 19.92831541218638, "percentage": 99.64, "elapsed_time": "0:56:08", "remaining_time": "0:00:12", "throughput": 2511.52, "total_tokens": 8460944}
|
| 4468 |
+
{"current_steps": 22245, "total_steps": 22320, "loss": 0.1013, "lr": 1.7658687894744897e-09, "epoch": 19.932795698924732, "percentage": 99.66, "elapsed_time": "0:56:09", "remaining_time": "0:00:11", "throughput": 2511.56, "total_tokens": 8462992}
|
| 4469 |
+
{"current_steps": 22250, "total_steps": 22320, "loss": 0.1935, "lr": 1.5411630718886739e-09, "epoch": 19.93727598566308, "percentage": 99.69, "elapsed_time": "0:56:10", "remaining_time": "0:00:10", "throughput": 2511.57, "total_tokens": 8464816}
|
| 4470 |
+
{"current_steps": 22255, "total_steps": 22320, "loss": 0.1165, "lr": 1.3317428514236784e-09, "epoch": 19.941756272401435, "percentage": 99.71, "elapsed_time": "0:56:11", "remaining_time": "0:00:09", "throughput": 2511.58, "total_tokens": 8466608}
|
| 4471 |
+
{"current_steps": 22260, "total_steps": 22320, "loss": 0.1306, "lr": 1.137608256135403e-09, "epoch": 19.946236559139784, "percentage": 99.73, "elapsed_time": "0:56:11", "remaining_time": "0:00:09", "throughput": 2511.59, "total_tokens": 8468464}
|
| 4472 |
+
{"current_steps": 22265, "total_steps": 22320, "loss": 0.1009, "lr": 9.587594047233416e-10, "epoch": 19.950716845878137, "percentage": 99.75, "elapsed_time": "0:56:12", "remaining_time": "0:00:08", "throughput": 2511.6, "total_tokens": 8470384}
|
| 4473 |
+
{"current_steps": 22270, "total_steps": 22320, "loss": 0.2564, "lr": 7.95196406550014e-10, "epoch": 19.955197132616487, "percentage": 99.78, "elapsed_time": "0:56:13", "remaining_time": "0:00:07", "throughput": 2511.62, "total_tokens": 8472176}
|
| 4474 |
+
{"current_steps": 22275, "total_steps": 22320, "loss": 0.454, "lr": 6.469193616243097e-10, "epoch": 19.95967741935484, "percentage": 99.8, "elapsed_time": "0:56:13", "remaining_time": "0:00:06", "throughput": 2511.64, "total_tokens": 8473968}
|
| 4475 |
+
{"current_steps": 22280, "total_steps": 22320, "loss": 0.1703, "lr": 5.139283606125922e-10, "epoch": 19.96415770609319, "percentage": 99.82, "elapsed_time": "0:56:14", "remaining_time": "0:00:06", "throughput": 2511.66, "total_tokens": 8475888}
|
| 4476 |
+
{"current_steps": 22285, "total_steps": 22320, "loss": 0.016, "lr": 3.9622348483592254e-10, "epoch": 19.968637992831543, "percentage": 99.84, "elapsed_time": "0:56:15", "remaining_time": "0:00:05", "throughput": 2511.68, "total_tokens": 8477904}
|
| 4477 |
+
{"current_steps": 22290, "total_steps": 22320, "loss": 0.0802, "lr": 2.9380480626173267e-10, "epoch": 19.973118279569892, "percentage": 99.87, "elapsed_time": "0:56:16", "remaining_time": "0:00:04", "throughput": 2511.71, "total_tokens": 8479792}
|
| 4478 |
+
{"current_steps": 22295, "total_steps": 22320, "loss": 0.014, "lr": 2.0667238751770346e-10, "epoch": 19.977598566308245, "percentage": 99.89, "elapsed_time": "0:56:16", "remaining_time": "0:00:03", "throughput": 2511.71, "total_tokens": 8481584}
|
| 4479 |
+
{"current_steps": 22300, "total_steps": 22320, "loss": 0.3991, "lr": 1.348262818778867e-10, "epoch": 19.982078853046595, "percentage": 99.91, "elapsed_time": "0:56:17", "remaining_time": "0:00:03", "throughput": 2511.76, "total_tokens": 8483696}
|
| 4480 |
+
{"current_steps": 22305, "total_steps": 22320, "loss": 0.0582, "lr": 7.826653327658306e-11, "epoch": 19.986559139784948, "percentage": 99.93, "elapsed_time": "0:56:18", "remaining_time": "0:00:02", "throughput": 2511.81, "total_tokens": 8485680}
|
| 4481 |
+
{"current_steps": 22310, "total_steps": 22320, "loss": 0.1921, "lr": 3.699317629723975e-11, "epoch": 19.991039426523297, "percentage": 99.96, "elapsed_time": "0:56:19", "remaining_time": "0:00:01", "throughput": 2511.86, "total_tokens": 8487856}
|
| 4482 |
+
{"current_steps": 22315, "total_steps": 22320, "loss": 0.1144, "lr": 1.1006236178001672e-11, "epoch": 19.99551971326165, "percentage": 99.98, "elapsed_time": "0:56:19", "remaining_time": "0:00:00", "throughput": 2511.87, "total_tokens": 8489712}
|
| 4483 |
+
{"current_steps": 22320, "total_steps": 22320, "loss": 0.0498, "lr": 3.0572880338475275e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:56:20", "remaining_time": "0:00:00", "throughput": 2511.78, "total_tokens": 8491296}
|
| 4484 |
+
{"current_steps": 22320, "total_steps": 22320, "eval_loss": 0.20898781716823578, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:56:26", "remaining_time": "0:00:00", "throughput": 2507.64, "total_tokens": 8491296}
|
| 4485 |
+
{"current_steps": 22320, "total_steps": 22320, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:56:26", "remaining_time": "0:00:00", "throughput": 2507.03, "total_tokens": 8491296}
|