Training in progress, step 21998
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +259 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 541712
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f31887a5940edac7d1b5482e054bc330bd28eb30003dda5ee9bfdbc743d6a2ba
|
| 3 |
size 541712
|
trainer_log.jsonl
CHANGED
|
@@ -4163,3 +4163,262 @@
|
|
| 4163 |
{"current_steps": 20735, "total_steps": 25880, "loss": 5.0235, "lr": 5.7840221540490234e-06, "epoch": 16.023956723338486, "percentage": 80.12, "elapsed_time": "0:47:04", "remaining_time": "0:11:40", "throughput": 2471.45, "total_tokens": 6979472}
|
| 4164 |
{"current_steps": 20740, "total_steps": 25880, "loss": 4.4022, "lr": 5.773241585801676e-06, "epoch": 16.027820710973725, "percentage": 80.14, "elapsed_time": "0:47:04", "remaining_time": "0:11:40", "throughput": 2471.46, "total_tokens": 6981296}
|
| 4165 |
{"current_steps": 20745, "total_steps": 25880, "loss": 4.6204, "lr": 5.762469761999201e-06, "epoch": 16.031684698608963, "percentage": 80.16, "elapsed_time": "0:47:05", "remaining_time": "0:11:39", "throughput": 2471.41, "total_tokens": 6982672}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4163 |
{"current_steps": 20735, "total_steps": 25880, "loss": 5.0235, "lr": 5.7840221540490234e-06, "epoch": 16.023956723338486, "percentage": 80.12, "elapsed_time": "0:47:04", "remaining_time": "0:11:40", "throughput": 2471.45, "total_tokens": 6979472}
|
| 4164 |
{"current_steps": 20740, "total_steps": 25880, "loss": 4.4022, "lr": 5.773241585801676e-06, "epoch": 16.027820710973725, "percentage": 80.14, "elapsed_time": "0:47:04", "remaining_time": "0:11:40", "throughput": 2471.46, "total_tokens": 6981296}
|
| 4165 |
{"current_steps": 20745, "total_steps": 25880, "loss": 4.6204, "lr": 5.762469761999201e-06, "epoch": 16.031684698608963, "percentage": 80.16, "elapsed_time": "0:47:05", "remaining_time": "0:11:39", "throughput": 2471.41, "total_tokens": 6982672}
|
| 4166 |
+
{"current_steps": 20750, "total_steps": 25880, "loss": 4.3999, "lr": 5.751706687540679e-06, "epoch": 16.035548686244205, "percentage": 80.18, "elapsed_time": "0:47:06", "remaining_time": "0:11:38", "throughput": 2471.41, "total_tokens": 6984240}
|
| 4167 |
+
{"current_steps": 20755, "total_steps": 25880, "loss": 4.715, "lr": 5.740952367321237e-06, "epoch": 16.039412673879443, "percentage": 80.2, "elapsed_time": "0:47:06", "remaining_time": "0:11:37", "throughput": 2471.4, "total_tokens": 6985744}
|
| 4168 |
+
{"current_steps": 20760, "total_steps": 25880, "loss": 4.6712, "lr": 5.7302068062319965e-06, "epoch": 16.043276661514682, "percentage": 80.22, "elapsed_time": "0:47:07", "remaining_time": "0:11:37", "throughput": 2471.43, "total_tokens": 6987440}
|
| 4169 |
+
{"current_steps": 20765, "total_steps": 25880, "loss": 4.5066, "lr": 5.719470009160102e-06, "epoch": 16.047140649149924, "percentage": 80.24, "elapsed_time": "0:47:07", "remaining_time": "0:11:36", "throughput": 2471.48, "total_tokens": 6989104}
|
| 4170 |
+
{"current_steps": 20770, "total_steps": 25880, "loss": 4.6272, "lr": 5.708741980988708e-06, "epoch": 16.051004636785162, "percentage": 80.26, "elapsed_time": "0:47:08", "remaining_time": "0:11:35", "throughput": 2471.47, "total_tokens": 6990704}
|
| 4171 |
+
{"current_steps": 20775, "total_steps": 25880, "loss": 4.7501, "lr": 5.698022726596996e-06, "epoch": 16.0548686244204, "percentage": 80.27, "elapsed_time": "0:47:09", "remaining_time": "0:11:35", "throughput": 2471.51, "total_tokens": 6992432}
|
| 4172 |
+
{"current_steps": 20780, "total_steps": 25880, "loss": 4.6089, "lr": 5.687312250860147e-06, "epoch": 16.058732612055643, "percentage": 80.29, "elapsed_time": "0:47:09", "remaining_time": "0:11:34", "throughput": 2471.5, "total_tokens": 6993904}
|
| 4173 |
+
{"current_steps": 20785, "total_steps": 25880, "loss": 4.4664, "lr": 5.676610558649337e-06, "epoch": 16.06259659969088, "percentage": 80.31, "elapsed_time": "0:47:10", "remaining_time": "0:11:33", "throughput": 2471.53, "total_tokens": 6995632}
|
| 4174 |
+
{"current_steps": 20790, "total_steps": 25880, "loss": 4.9613, "lr": 5.665917654831773e-06, "epoch": 16.06646058732612, "percentage": 80.33, "elapsed_time": "0:47:11", "remaining_time": "0:11:33", "throughput": 2471.55, "total_tokens": 6997392}
|
| 4175 |
+
{"current_steps": 20795, "total_steps": 25880, "loss": 4.5305, "lr": 5.655233544270649e-06, "epoch": 16.07032457496136, "percentage": 80.35, "elapsed_time": "0:47:11", "remaining_time": "0:11:32", "throughput": 2471.58, "total_tokens": 6999088}
|
| 4176 |
+
{"current_steps": 20800, "total_steps": 25880, "loss": 4.7636, "lr": 5.644558231825162e-06, "epoch": 16.0741885625966, "percentage": 80.37, "elapsed_time": "0:47:12", "remaining_time": "0:11:31", "throughput": 2471.63, "total_tokens": 7001040}
|
| 4177 |
+
{"current_steps": 20805, "total_steps": 25880, "loss": 4.8087, "lr": 5.633891722350504e-06, "epoch": 16.07805255023184, "percentage": 80.39, "elapsed_time": "0:47:13", "remaining_time": "0:11:31", "throughput": 2471.7, "total_tokens": 7002960}
|
| 4178 |
+
{"current_steps": 20810, "total_steps": 25880, "loss": 4.5693, "lr": 5.623234020697868e-06, "epoch": 16.08191653786708, "percentage": 80.41, "elapsed_time": "0:47:13", "remaining_time": "0:11:30", "throughput": 2471.79, "total_tokens": 7004848}
|
| 4179 |
+
{"current_steps": 20815, "total_steps": 25880, "loss": 4.907, "lr": 5.612585131714437e-06, "epoch": 16.08578052550232, "percentage": 80.43, "elapsed_time": "0:47:14", "remaining_time": "0:11:29", "throughput": 2471.81, "total_tokens": 7006448}
|
| 4180 |
+
{"current_steps": 20820, "total_steps": 25880, "loss": 4.8093, "lr": 5.601945060243397e-06, "epoch": 16.089644513137557, "percentage": 80.45, "elapsed_time": "0:47:15", "remaining_time": "0:11:29", "throughput": 2471.84, "total_tokens": 7008144}
|
| 4181 |
+
{"current_steps": 20825, "total_steps": 25880, "loss": 4.9181, "lr": 5.591313811123919e-06, "epoch": 16.0935085007728, "percentage": 80.47, "elapsed_time": "0:47:15", "remaining_time": "0:11:28", "throughput": 2471.91, "total_tokens": 7009904}
|
| 4182 |
+
{"current_steps": 20830, "total_steps": 25880, "loss": 4.6536, "lr": 5.580691389191153e-06, "epoch": 16.097372488408038, "percentage": 80.49, "elapsed_time": "0:47:16", "remaining_time": "0:11:27", "throughput": 2471.94, "total_tokens": 7011696}
|
| 4183 |
+
{"current_steps": 20835, "total_steps": 25880, "loss": 4.8097, "lr": 5.570077799276241e-06, "epoch": 16.101236476043276, "percentage": 80.51, "elapsed_time": "0:47:17", "remaining_time": "0:11:27", "throughput": 2471.95, "total_tokens": 7013424}
|
| 4184 |
+
{"current_steps": 20840, "total_steps": 25880, "loss": 4.5795, "lr": 5.559473046206309e-06, "epoch": 16.105100463678518, "percentage": 80.53, "elapsed_time": "0:47:17", "remaining_time": "0:11:26", "throughput": 2471.96, "total_tokens": 7015152}
|
| 4185 |
+
{"current_steps": 20845, "total_steps": 25880, "loss": 4.2004, "lr": 5.548877134804459e-06, "epoch": 16.108964451313756, "percentage": 80.54, "elapsed_time": "0:47:18", "remaining_time": "0:11:25", "throughput": 2472.0, "total_tokens": 7016784}
|
| 4186 |
+
{"current_steps": 20850, "total_steps": 25880, "loss": 4.6596, "lr": 5.538290069889768e-06, "epoch": 16.112828438948995, "percentage": 80.56, "elapsed_time": "0:47:19", "remaining_time": "0:11:24", "throughput": 2471.98, "total_tokens": 7018352}
|
| 4187 |
+
{"current_steps": 20855, "total_steps": 25880, "loss": 4.52, "lr": 5.527711856277307e-06, "epoch": 16.116692426584233, "percentage": 80.58, "elapsed_time": "0:47:19", "remaining_time": "0:11:24", "throughput": 2472.02, "total_tokens": 7020176}
|
| 4188 |
+
{"current_steps": 20860, "total_steps": 25880, "loss": 4.9252, "lr": 5.5171424987781165e-06, "epoch": 16.120556414219475, "percentage": 80.6, "elapsed_time": "0:47:20", "remaining_time": "0:11:23", "throughput": 2472.05, "total_tokens": 7021968}
|
| 4189 |
+
{"current_steps": 20865, "total_steps": 25880, "loss": 4.7159, "lr": 5.506582002199193e-06, "epoch": 16.124420401854714, "percentage": 80.62, "elapsed_time": "0:47:21", "remaining_time": "0:11:22", "throughput": 2472.1, "total_tokens": 7023632}
|
| 4190 |
+
{"current_steps": 20870, "total_steps": 25880, "loss": 4.4184, "lr": 5.496030371343519e-06, "epoch": 16.128284389489952, "percentage": 80.64, "elapsed_time": "0:47:21", "remaining_time": "0:11:22", "throughput": 2472.13, "total_tokens": 7025424}
|
| 4191 |
+
{"current_steps": 20875, "total_steps": 25880, "loss": 4.9327, "lr": 5.485487611010034e-06, "epoch": 16.132148377125194, "percentage": 80.66, "elapsed_time": "0:47:22", "remaining_time": "0:11:21", "throughput": 2472.08, "total_tokens": 7026864}
|
| 4192 |
+
{"current_steps": 20880, "total_steps": 25880, "loss": 4.6602, "lr": 5.474953725993653e-06, "epoch": 16.136012364760433, "percentage": 80.68, "elapsed_time": "0:47:23", "remaining_time": "0:11:20", "throughput": 2472.07, "total_tokens": 7028368}
|
| 4193 |
+
{"current_steps": 20885, "total_steps": 25880, "loss": 4.6667, "lr": 5.46442872108524e-06, "epoch": 16.13987635239567, "percentage": 80.7, "elapsed_time": "0:47:23", "remaining_time": "0:11:20", "throughput": 2472.11, "total_tokens": 7030000}
|
| 4194 |
+
{"current_steps": 20890, "total_steps": 25880, "loss": 4.7069, "lr": 5.453912601071648e-06, "epoch": 16.143740340030913, "percentage": 80.72, "elapsed_time": "0:47:24", "remaining_time": "0:11:19", "throughput": 2472.15, "total_tokens": 7031728}
|
| 4195 |
+
{"current_steps": 20895, "total_steps": 25880, "loss": 4.3971, "lr": 5.443405370735655e-06, "epoch": 16.14760432766615, "percentage": 80.74, "elapsed_time": "0:47:25", "remaining_time": "0:11:18", "throughput": 2472.12, "total_tokens": 7033264}
|
| 4196 |
+
{"current_steps": 20900, "total_steps": 25880, "loss": 4.8128, "lr": 5.432907034856024e-06, "epoch": 16.15146831530139, "percentage": 80.76, "elapsed_time": "0:47:25", "remaining_time": "0:11:18", "throughput": 2472.19, "total_tokens": 7034992}
|
| 4197 |
+
{"current_steps": 20905, "total_steps": 25880, "loss": 4.8712, "lr": 5.4224175982074575e-06, "epoch": 16.155332302936632, "percentage": 80.78, "elapsed_time": "0:47:26", "remaining_time": "0:11:17", "throughput": 2472.25, "total_tokens": 7037040}
|
| 4198 |
+
{"current_steps": 20910, "total_steps": 25880, "loss": 4.6008, "lr": 5.411937065560613e-06, "epoch": 16.15919629057187, "percentage": 80.8, "elapsed_time": "0:47:27", "remaining_time": "0:11:16", "throughput": 2472.31, "total_tokens": 7038992}
|
| 4199 |
+
{"current_steps": 20915, "total_steps": 25880, "loss": 4.836, "lr": 5.401465441682099e-06, "epoch": 16.16306027820711, "percentage": 80.82, "elapsed_time": "0:47:27", "remaining_time": "0:11:16", "throughput": 2472.34, "total_tokens": 7040880}
|
| 4200 |
+
{"current_steps": 20920, "total_steps": 25880, "loss": 4.6234, "lr": 5.391002731334466e-06, "epoch": 16.16692426584235, "percentage": 80.83, "elapsed_time": "0:47:28", "remaining_time": "0:11:15", "throughput": 2472.36, "total_tokens": 7042640}
|
| 4201 |
+
{"current_steps": 20925, "total_steps": 25880, "loss": 4.9054, "lr": 5.380548939276231e-06, "epoch": 16.17078825347759, "percentage": 80.85, "elapsed_time": "0:47:29", "remaining_time": "0:11:14", "throughput": 2472.42, "total_tokens": 7044528}
|
| 4202 |
+
{"current_steps": 20930, "total_steps": 25880, "loss": 4.6685, "lr": 5.370104070261836e-06, "epoch": 16.174652241112828, "percentage": 80.87, "elapsed_time": "0:47:29", "remaining_time": "0:11:14", "throughput": 2472.44, "total_tokens": 7046256}
|
| 4203 |
+
{"current_steps": 20935, "total_steps": 25880, "loss": 4.7407, "lr": 5.359668129041662e-06, "epoch": 16.17851622874807, "percentage": 80.89, "elapsed_time": "0:47:30", "remaining_time": "0:11:13", "throughput": 2472.48, "total_tokens": 7048016}
|
| 4204 |
+
{"current_steps": 20940, "total_steps": 25880, "loss": 4.4198, "lr": 5.34924112036205e-06, "epoch": 16.182380216383308, "percentage": 80.91, "elapsed_time": "0:47:31", "remaining_time": "0:11:12", "throughput": 2472.53, "total_tokens": 7049776}
|
| 4205 |
+
{"current_steps": 20945, "total_steps": 25880, "loss": 4.7706, "lr": 5.338823048965261e-06, "epoch": 16.186244204018546, "percentage": 80.93, "elapsed_time": "0:47:31", "remaining_time": "0:11:11", "throughput": 2472.57, "total_tokens": 7051600}
|
| 4206 |
+
{"current_steps": 20950, "total_steps": 25880, "loss": 4.7997, "lr": 5.3284139195894924e-06, "epoch": 16.19010819165379, "percentage": 80.95, "elapsed_time": "0:47:32", "remaining_time": "0:11:11", "throughput": 2472.63, "total_tokens": 7053680}
|
| 4207 |
+
{"current_steps": 20955, "total_steps": 25880, "loss": 5.1195, "lr": 5.318013736968877e-06, "epoch": 16.193972179289027, "percentage": 80.97, "elapsed_time": "0:47:33", "remaining_time": "0:11:10", "throughput": 2472.68, "total_tokens": 7055440}
|
| 4208 |
+
{"current_steps": 20960, "total_steps": 25880, "loss": 4.5336, "lr": 5.307622505833493e-06, "epoch": 16.197836166924265, "percentage": 80.99, "elapsed_time": "0:47:33", "remaining_time": "0:11:09", "throughput": 2472.69, "total_tokens": 7057008}
|
| 4209 |
+
{"current_steps": 20965, "total_steps": 25880, "loss": 4.7871, "lr": 5.297240230909326e-06, "epoch": 16.201700154559504, "percentage": 81.01, "elapsed_time": "0:47:34", "remaining_time": "0:11:09", "throughput": 2472.74, "total_tokens": 7058768}
|
| 4210 |
+
{"current_steps": 20970, "total_steps": 25880, "loss": 4.3984, "lr": 5.2868669169182955e-06, "epoch": 16.205564142194746, "percentage": 81.03, "elapsed_time": "0:47:35", "remaining_time": "0:11:08", "throughput": 2472.76, "total_tokens": 7060464}
|
| 4211 |
+
{"current_steps": 20975, "total_steps": 25880, "loss": 4.6323, "lr": 5.2765025685782425e-06, "epoch": 16.209428129829984, "percentage": 81.05, "elapsed_time": "0:47:35", "remaining_time": "0:11:07", "throughput": 2472.76, "total_tokens": 7062000}
|
| 4212 |
+
{"current_steps": 20980, "total_steps": 25880, "loss": 4.7277, "lr": 5.266147190602949e-06, "epoch": 16.213292117465222, "percentage": 81.07, "elapsed_time": "0:47:36", "remaining_time": "0:11:07", "throughput": 2472.75, "total_tokens": 7063568}
|
| 4213 |
+
{"current_steps": 20985, "total_steps": 25880, "loss": 4.9312, "lr": 5.255800787702095e-06, "epoch": 16.217156105100464, "percentage": 81.09, "elapsed_time": "0:47:37", "remaining_time": "0:11:06", "throughput": 2472.83, "total_tokens": 7065520}
|
| 4214 |
+
{"current_steps": 20990, "total_steps": 25880, "loss": 4.4758, "lr": 5.245463364581277e-06, "epoch": 16.221020092735703, "percentage": 81.11, "elapsed_time": "0:47:37", "remaining_time": "0:11:05", "throughput": 2472.86, "total_tokens": 7067216}
|
| 4215 |
+
{"current_steps": 20995, "total_steps": 25880, "loss": 4.539, "lr": 5.235134925942034e-06, "epoch": 16.22488408037094, "percentage": 81.12, "elapsed_time": "0:47:38", "remaining_time": "0:11:05", "throughput": 2472.87, "total_tokens": 7068784}
|
| 4216 |
+
{"current_steps": 21000, "total_steps": 25880, "loss": 4.7359, "lr": 5.2248154764817925e-06, "epoch": 16.228748068006183, "percentage": 81.14, "elapsed_time": "0:47:39", "remaining_time": "0:11:04", "throughput": 2472.81, "total_tokens": 7070096}
|
| 4217 |
+
{"current_steps": 21005, "total_steps": 25880, "loss": 4.961, "lr": 5.214505020893903e-06, "epoch": 16.23261205564142, "percentage": 81.16, "elapsed_time": "0:47:39", "remaining_time": "0:11:03", "throughput": 2472.86, "total_tokens": 7072016}
|
| 4218 |
+
{"current_steps": 21010, "total_steps": 25880, "loss": 4.5432, "lr": 5.204203563867619e-06, "epoch": 16.23647604327666, "percentage": 81.18, "elapsed_time": "0:47:40", "remaining_time": "0:11:03", "throughput": 2472.88, "total_tokens": 7073584}
|
| 4219 |
+
{"current_steps": 21015, "total_steps": 25880, "loss": 4.8192, "lr": 5.193911110088101e-06, "epoch": 16.240340030911902, "percentage": 81.2, "elapsed_time": "0:47:41", "remaining_time": "0:11:02", "throughput": 2472.97, "total_tokens": 7075600}
|
| 4220 |
+
{"current_steps": 21020, "total_steps": 25880, "loss": 4.8759, "lr": 5.183627664236429e-06, "epoch": 16.24420401854714, "percentage": 81.22, "elapsed_time": "0:47:41", "remaining_time": "0:11:01", "throughput": 2473.02, "total_tokens": 7077264}
|
| 4221 |
+
{"current_steps": 21025, "total_steps": 25880, "loss": 4.6153, "lr": 5.173353230989567e-06, "epoch": 16.24806800618238, "percentage": 81.24, "elapsed_time": "0:47:42", "remaining_time": "0:11:01", "throughput": 2473.07, "total_tokens": 7079312}
|
| 4222 |
+
{"current_steps": 21030, "total_steps": 25880, "loss": 4.6863, "lr": 5.163087815020398e-06, "epoch": 16.25193199381762, "percentage": 81.26, "elapsed_time": "0:47:43", "remaining_time": "0:11:00", "throughput": 2473.12, "total_tokens": 7081072}
|
| 4223 |
+
{"current_steps": 21035, "total_steps": 25880, "loss": 4.6627, "lr": 5.152831420997689e-06, "epoch": 16.25579598145286, "percentage": 81.28, "elapsed_time": "0:47:43", "remaining_time": "0:10:59", "throughput": 2473.16, "total_tokens": 7082896}
|
| 4224 |
+
{"current_steps": 21040, "total_steps": 25880, "loss": 4.8223, "lr": 5.1425840535861106e-06, "epoch": 16.259659969088098, "percentage": 81.3, "elapsed_time": "0:47:44", "remaining_time": "0:10:58", "throughput": 2473.18, "total_tokens": 7084560}
|
| 4225 |
+
{"current_steps": 21045, "total_steps": 25880, "loss": 4.7063, "lr": 5.132345717446227e-06, "epoch": 16.26352395672334, "percentage": 81.32, "elapsed_time": "0:47:45", "remaining_time": "0:10:58", "throughput": 2473.13, "total_tokens": 7085936}
|
| 4226 |
+
{"current_steps": 21050, "total_steps": 25880, "loss": 4.7695, "lr": 5.12211641723449e-06, "epoch": 16.26738794435858, "percentage": 81.34, "elapsed_time": "0:47:45", "remaining_time": "0:10:57", "throughput": 2473.13, "total_tokens": 7087536}
|
| 4227 |
+
{"current_steps": 21055, "total_steps": 25880, "loss": 4.873, "lr": 5.111896157603246e-06, "epoch": 16.271251931993817, "percentage": 81.36, "elapsed_time": "0:47:46", "remaining_time": "0:10:56", "throughput": 2473.16, "total_tokens": 7089360}
|
| 4228 |
+
{"current_steps": 21060, "total_steps": 25880, "loss": 4.6015, "lr": 5.101684943200735e-06, "epoch": 16.27511591962906, "percentage": 81.38, "elapsed_time": "0:47:47", "remaining_time": "0:10:56", "throughput": 2473.22, "total_tokens": 7091312}
|
| 4229 |
+
{"current_steps": 21065, "total_steps": 25880, "loss": 4.7518, "lr": 5.091482778671086e-06, "epoch": 16.278979907264297, "percentage": 81.39, "elapsed_time": "0:47:47", "remaining_time": "0:10:55", "throughput": 2473.24, "total_tokens": 7092880}
|
| 4230 |
+
{"current_steps": 21070, "total_steps": 25880, "loss": 4.8082, "lr": 5.081289668654296e-06, "epoch": 16.282843894899536, "percentage": 81.41, "elapsed_time": "0:47:48", "remaining_time": "0:10:54", "throughput": 2473.3, "total_tokens": 7094768}
|
| 4231 |
+
{"current_steps": 21075, "total_steps": 25880, "loss": 4.7727, "lr": 5.071105617786251e-06, "epoch": 16.286707882534778, "percentage": 81.43, "elapsed_time": "0:47:49", "remaining_time": "0:10:54", "throughput": 2473.26, "total_tokens": 7096176}
|
| 4232 |
+
{"current_steps": 21080, "total_steps": 25880, "loss": 4.4675, "lr": 5.060930630698724e-06, "epoch": 16.290571870170016, "percentage": 81.45, "elapsed_time": "0:47:49", "remaining_time": "0:10:53", "throughput": 2473.22, "total_tokens": 7097552}
|
| 4233 |
+
{"current_steps": 21085, "total_steps": 25880, "loss": 5.0859, "lr": 5.050764712019354e-06, "epoch": 16.294435857805254, "percentage": 81.47, "elapsed_time": "0:47:50", "remaining_time": "0:10:52", "throughput": 2473.24, "total_tokens": 7099312}
|
| 4234 |
+
{"current_steps": 21090, "total_steps": 25880, "loss": 4.5749, "lr": 5.040607866371658e-06, "epoch": 16.298299845440496, "percentage": 81.49, "elapsed_time": "0:47:51", "remaining_time": "0:10:52", "throughput": 2473.23, "total_tokens": 7100912}
|
| 4235 |
+
{"current_steps": 21095, "total_steps": 25880, "loss": 4.7375, "lr": 5.030460098375037e-06, "epoch": 16.302163833075735, "percentage": 81.51, "elapsed_time": "0:47:51", "remaining_time": "0:10:51", "throughput": 2473.2, "total_tokens": 7102320}
|
| 4236 |
+
{"current_steps": 21100, "total_steps": 25880, "loss": 4.4926, "lr": 5.0203214126447625e-06, "epoch": 16.306027820710973, "percentage": 81.53, "elapsed_time": "0:47:52", "remaining_time": "0:10:50", "throughput": 2473.21, "total_tokens": 7104144}
|
| 4237 |
+
{"current_steps": 21105, "total_steps": 25880, "loss": 4.7087, "lr": 5.010191813791962e-06, "epoch": 16.30989180834621, "percentage": 81.55, "elapsed_time": "0:47:53", "remaining_time": "0:10:50", "throughput": 2473.26, "total_tokens": 7105904}
|
| 4238 |
+
{"current_steps": 21110, "total_steps": 25880, "loss": 4.5305, "lr": 5.00007130642364e-06, "epoch": 16.313755795981454, "percentage": 81.57, "elapsed_time": "0:47:53", "remaining_time": "0:10:49", "throughput": 2473.31, "total_tokens": 7107760}
|
| 4239 |
+
{"current_steps": 21115, "total_steps": 25880, "loss": 4.7916, "lr": 4.989959895142663e-06, "epoch": 16.317619783616692, "percentage": 81.59, "elapsed_time": "0:47:54", "remaining_time": "0:10:48", "throughput": 2473.26, "total_tokens": 7109136}
|
| 4240 |
+
{"current_steps": 21120, "total_steps": 25880, "loss": 4.8256, "lr": 4.979857584547762e-06, "epoch": 16.32148377125193, "percentage": 81.61, "elapsed_time": "0:47:55", "remaining_time": "0:10:47", "throughput": 2473.29, "total_tokens": 7110832}
|
| 4241 |
+
{"current_steps": 21125, "total_steps": 25880, "loss": 4.6914, "lr": 4.969764379233518e-06, "epoch": 16.325347758887172, "percentage": 81.63, "elapsed_time": "0:47:55", "remaining_time": "0:10:47", "throughput": 2473.36, "total_tokens": 7112784}
|
| 4242 |
+
{"current_steps": 21130, "total_steps": 25880, "loss": 4.7862, "lr": 4.959680283790399e-06, "epoch": 16.32921174652241, "percentage": 81.65, "elapsed_time": "0:47:56", "remaining_time": "0:10:46", "throughput": 2473.41, "total_tokens": 7114448}
|
| 4243 |
+
{"current_steps": 21135, "total_steps": 25880, "loss": 4.6583, "lr": 4.9496053028046965e-06, "epoch": 16.33307573415765, "percentage": 81.67, "elapsed_time": "0:47:57", "remaining_time": "0:10:45", "throughput": 2473.46, "total_tokens": 7116304}
|
| 4244 |
+
{"current_steps": 21140, "total_steps": 25880, "loss": 4.6462, "lr": 4.939539440858587e-06, "epoch": 16.33693972179289, "percentage": 81.68, "elapsed_time": "0:47:57", "remaining_time": "0:10:45", "throughput": 2473.47, "total_tokens": 7117968}
|
| 4245 |
+
{"current_steps": 21145, "total_steps": 25880, "loss": 4.804, "lr": 4.929482702530078e-06, "epoch": 16.34080370942813, "percentage": 81.7, "elapsed_time": "0:47:58", "remaining_time": "0:10:44", "throughput": 2473.57, "total_tokens": 7119984}
|
| 4246 |
+
{"current_steps": 21150, "total_steps": 25880, "loss": 4.8935, "lr": 4.919435092393032e-06, "epoch": 16.344667697063368, "percentage": 81.72, "elapsed_time": "0:47:59", "remaining_time": "0:10:43", "throughput": 2473.6, "total_tokens": 7121712}
|
| 4247 |
+
{"current_steps": 21155, "total_steps": 25880, "loss": 4.4164, "lr": 4.909396615017164e-06, "epoch": 16.34853168469861, "percentage": 81.74, "elapsed_time": "0:47:59", "remaining_time": "0:10:43", "throughput": 2473.61, "total_tokens": 7123344}
|
| 4248 |
+
{"current_steps": 21160, "total_steps": 25880, "loss": 4.7971, "lr": 4.899367274968028e-06, "epoch": 16.35239567233385, "percentage": 81.76, "elapsed_time": "0:48:00", "remaining_time": "0:10:42", "throughput": 2473.63, "total_tokens": 7125136}
|
| 4249 |
+
{"current_steps": 21165, "total_steps": 25880, "loss": 5.0486, "lr": 4.889347076807038e-06, "epoch": 16.356259659969087, "percentage": 81.78, "elapsed_time": "0:48:01", "remaining_time": "0:10:41", "throughput": 2473.71, "total_tokens": 7126896}
|
| 4250 |
+
{"current_steps": 21170, "total_steps": 25880, "loss": 4.6641, "lr": 4.879336025091435e-06, "epoch": 16.36012364760433, "percentage": 81.8, "elapsed_time": "0:48:01", "remaining_time": "0:10:41", "throughput": 2473.74, "total_tokens": 7128784}
|
| 4251 |
+
{"current_steps": 21175, "total_steps": 25880, "loss": 4.4564, "lr": 4.869334124374303e-06, "epoch": 16.363987635239567, "percentage": 81.82, "elapsed_time": "0:48:02", "remaining_time": "0:10:40", "throughput": 2473.74, "total_tokens": 7130320}
|
| 4252 |
+
{"current_steps": 21180, "total_steps": 25880, "loss": 4.3407, "lr": 4.859341379204571e-06, "epoch": 16.367851622874806, "percentage": 81.84, "elapsed_time": "0:48:03", "remaining_time": "0:10:39", "throughput": 2473.74, "total_tokens": 7131856}
|
| 4253 |
+
{"current_steps": 21185, "total_steps": 25880, "loss": 5.1145, "lr": 4.849357794126999e-06, "epoch": 16.371715610510048, "percentage": 81.86, "elapsed_time": "0:48:03", "remaining_time": "0:10:39", "throughput": 2473.73, "total_tokens": 7133360}
|
| 4254 |
+
{"current_steps": 21190, "total_steps": 25880, "loss": 4.2135, "lr": 4.8393833736821795e-06, "epoch": 16.375579598145286, "percentage": 81.88, "elapsed_time": "0:48:04", "remaining_time": "0:10:38", "throughput": 2473.71, "total_tokens": 7134928}
|
| 4255 |
+
{"current_steps": 21195, "total_steps": 25880, "loss": 4.9818, "lr": 4.8294181224065345e-06, "epoch": 16.379443585780525, "percentage": 81.9, "elapsed_time": "0:48:04", "remaining_time": "0:10:37", "throughput": 2473.69, "total_tokens": 7136400}
|
| 4256 |
+
{"current_steps": 21200, "total_steps": 25880, "loss": 4.5745, "lr": 4.8194620448323294e-06, "epoch": 16.383307573415767, "percentage": 81.92, "elapsed_time": "0:48:05", "remaining_time": "0:10:37", "throughput": 2473.66, "total_tokens": 7137936}
|
| 4257 |
+
{"current_steps": 21205, "total_steps": 25880, "loss": 5.0588, "lr": 4.809515145487642e-06, "epoch": 16.387171561051005, "percentage": 81.94, "elapsed_time": "0:48:06", "remaining_time": "0:10:36", "throughput": 2473.71, "total_tokens": 7139632}
|
| 4258 |
+
{"current_steps": 21210, "total_steps": 25880, "loss": 4.512, "lr": 4.799577428896385e-06, "epoch": 16.391035548686244, "percentage": 81.96, "elapsed_time": "0:48:06", "remaining_time": "0:10:35", "throughput": 2473.76, "total_tokens": 7141520}
|
| 4259 |
+
{"current_steps": 21215, "total_steps": 25880, "loss": 4.6764, "lr": 4.789648899578278e-06, "epoch": 16.394899536321482, "percentage": 81.97, "elapsed_time": "0:48:07", "remaining_time": "0:10:34", "throughput": 2473.78, "total_tokens": 7143216}
|
| 4260 |
+
{"current_steps": 21220, "total_steps": 25880, "loss": 4.8282, "lr": 4.7797295620488954e-06, "epoch": 16.398763523956724, "percentage": 81.99, "elapsed_time": "0:48:08", "remaining_time": "0:10:34", "throughput": 2473.74, "total_tokens": 7144624}
|
| 4261 |
+
{"current_steps": 21225, "total_steps": 25880, "loss": 4.3335, "lr": 4.7698194208196045e-06, "epoch": 16.402627511591962, "percentage": 82.01, "elapsed_time": "0:48:08", "remaining_time": "0:10:33", "throughput": 2473.85, "total_tokens": 7146608}
|
| 4262 |
+
{"current_steps": 21230, "total_steps": 25880, "loss": 4.7429, "lr": 4.759918480397585e-06, "epoch": 16.4064914992272, "percentage": 82.03, "elapsed_time": "0:48:09", "remaining_time": "0:10:32", "throughput": 2473.8, "total_tokens": 7147984}
|
| 4263 |
+
{"current_steps": 21235, "total_steps": 25880, "loss": 4.4656, "lr": 4.750026745285863e-06, "epoch": 16.410355486862443, "percentage": 82.05, "elapsed_time": "0:48:10", "remaining_time": "0:10:32", "throughput": 2473.8, "total_tokens": 7149584}
|
| 4264 |
+
{"current_steps": 21240, "total_steps": 25880, "loss": 4.6543, "lr": 4.740144219983247e-06, "epoch": 16.41421947449768, "percentage": 82.07, "elapsed_time": "0:48:10", "remaining_time": "0:10:31", "throughput": 2473.82, "total_tokens": 7151280}
|
| 4265 |
+
{"current_steps": 21245, "total_steps": 25880, "loss": 4.7988, "lr": 4.7302709089843744e-06, "epoch": 16.41808346213292, "percentage": 82.09, "elapsed_time": "0:48:11", "remaining_time": "0:10:30", "throughput": 2473.86, "total_tokens": 7152912}
|
| 4266 |
+
{"current_steps": 21250, "total_steps": 25880, "loss": 4.6622, "lr": 4.720406816779679e-06, "epoch": 16.42194744976816, "percentage": 82.11, "elapsed_time": "0:48:12", "remaining_time": "0:10:30", "throughput": 2473.81, "total_tokens": 7154256}
|
| 4267 |
+
{"current_steps": 21255, "total_steps": 25880, "loss": 4.6793, "lr": 4.71055194785541e-06, "epoch": 16.4258114374034, "percentage": 82.13, "elapsed_time": "0:48:12", "remaining_time": "0:10:29", "throughput": 2473.84, "total_tokens": 7155888}
|
| 4268 |
+
{"current_steps": 21260, "total_steps": 25880, "loss": 4.8071, "lr": 4.700706306693628e-06, "epoch": 16.42967542503864, "percentage": 82.15, "elapsed_time": "0:48:13", "remaining_time": "0:10:28", "throughput": 2473.82, "total_tokens": 7157424}
|
| 4269 |
+
{"current_steps": 21265, "total_steps": 25880, "loss": 4.6154, "lr": 4.69086989777218e-06, "epoch": 16.43353941267388, "percentage": 82.17, "elapsed_time": "0:48:13", "remaining_time": "0:10:28", "throughput": 2473.86, "total_tokens": 7159152}
|
| 4270 |
+
{"current_steps": 21270, "total_steps": 25880, "loss": 4.5376, "lr": 4.681042725564735e-06, "epoch": 16.43740340030912, "percentage": 82.19, "elapsed_time": "0:48:14", "remaining_time": "0:10:27", "throughput": 2473.92, "total_tokens": 7160976}
|
| 4271 |
+
{"current_steps": 21275, "total_steps": 25880, "loss": 4.737, "lr": 4.671224794540746e-06, "epoch": 16.441267387944357, "percentage": 82.21, "elapsed_time": "0:48:15", "remaining_time": "0:10:26", "throughput": 2474.01, "total_tokens": 7163120}
|
| 4272 |
+
{"current_steps": 21280, "total_steps": 25880, "loss": 4.6298, "lr": 4.661416109165462e-06, "epoch": 16.4451313755796, "percentage": 82.23, "elapsed_time": "0:48:15", "remaining_time": "0:10:26", "throughput": 2474.05, "total_tokens": 7164848}
|
| 4273 |
+
{"current_steps": 21285, "total_steps": 25880, "loss": 4.4877, "lr": 4.651616673899936e-06, "epoch": 16.448995363214838, "percentage": 82.24, "elapsed_time": "0:48:16", "remaining_time": "0:10:25", "throughput": 2474.09, "total_tokens": 7166576}
|
| 4274 |
+
{"current_steps": 21290, "total_steps": 25880, "loss": 4.3857, "lr": 4.641826493201007e-06, "epoch": 16.452859350850076, "percentage": 82.26, "elapsed_time": "0:48:17", "remaining_time": "0:10:24", "throughput": 2474.06, "total_tokens": 7168112}
|
| 4275 |
+
{"current_steps": 21295, "total_steps": 25880, "loss": 4.9788, "lr": 4.632045571521304e-06, "epoch": 16.456723338485318, "percentage": 82.28, "elapsed_time": "0:48:17", "remaining_time": "0:10:23", "throughput": 2474.08, "total_tokens": 7169680}
|
| 4276 |
+
{"current_steps": 21300, "total_steps": 25880, "loss": 4.2424, "lr": 4.6222739133092605e-06, "epoch": 16.460587326120557, "percentage": 82.3, "elapsed_time": "0:48:18", "remaining_time": "0:10:23", "throughput": 2474.08, "total_tokens": 7171312}
|
| 4277 |
+
{"current_steps": 21305, "total_steps": 25880, "loss": 4.606, "lr": 4.6125115230090724e-06, "epoch": 16.464451313755795, "percentage": 82.32, "elapsed_time": "0:48:19", "remaining_time": "0:10:22", "throughput": 2474.08, "total_tokens": 7172912}
|
| 4278 |
+
{"current_steps": 21310, "total_steps": 25880, "loss": 4.7768, "lr": 4.602758405060745e-06, "epoch": 16.468315301391037, "percentage": 82.34, "elapsed_time": "0:48:19", "remaining_time": "0:10:21", "throughput": 2474.11, "total_tokens": 7174608}
|
| 4279 |
+
{"current_steps": 21315, "total_steps": 25880, "loss": 4.6478, "lr": 4.59301456390005e-06, "epoch": 16.472179289026275, "percentage": 82.36, "elapsed_time": "0:48:20", "remaining_time": "0:10:21", "throughput": 2474.09, "total_tokens": 7176176}
|
| 4280 |
+
{"current_steps": 21320, "total_steps": 25880, "loss": 4.5273, "lr": 4.583280003958546e-06, "epoch": 16.476043276661514, "percentage": 82.38, "elapsed_time": "0:48:21", "remaining_time": "0:10:20", "throughput": 2474.12, "total_tokens": 7177872}
|
| 4281 |
+
{"current_steps": 21325, "total_steps": 25880, "loss": 4.3769, "lr": 4.573554729663562e-06, "epoch": 16.479907264296756, "percentage": 82.4, "elapsed_time": "0:48:21", "remaining_time": "0:10:19", "throughput": 2474.13, "total_tokens": 7179600}
|
| 4282 |
+
{"current_steps": 21330, "total_steps": 25880, "loss": 5.1797, "lr": 4.563838745438215e-06, "epoch": 16.483771251931994, "percentage": 82.42, "elapsed_time": "0:48:22", "remaining_time": "0:10:19", "throughput": 2474.16, "total_tokens": 7181232}
|
| 4283 |
+
{"current_steps": 21335, "total_steps": 25880, "loss": 4.4628, "lr": 4.554132055701396e-06, "epoch": 16.487635239567233, "percentage": 82.44, "elapsed_time": "0:48:23", "remaining_time": "0:10:18", "throughput": 2474.18, "total_tokens": 7182896}
|
| 4284 |
+
{"current_steps": 21340, "total_steps": 25880, "loss": 4.8679, "lr": 4.544434664867761e-06, "epoch": 16.491499227202475, "percentage": 82.46, "elapsed_time": "0:48:23", "remaining_time": "0:10:17", "throughput": 2474.21, "total_tokens": 7184496}
|
| 4285 |
+
{"current_steps": 21345, "total_steps": 25880, "loss": 4.7874, "lr": 4.534746577347748e-06, "epoch": 16.495363214837713, "percentage": 82.48, "elapsed_time": "0:48:24", "remaining_time": "0:10:17", "throughput": 2474.21, "total_tokens": 7186032}
|
| 4286 |
+
{"current_steps": 21350, "total_steps": 25880, "loss": 4.6404, "lr": 4.525067797547553e-06, "epoch": 16.49922720247295, "percentage": 82.5, "elapsed_time": "0:48:25", "remaining_time": "0:10:16", "throughput": 2474.23, "total_tokens": 7187760}
|
| 4287 |
+
{"current_steps": 21355, "total_steps": 25880, "loss": 4.5147, "lr": 4.515398329869144e-06, "epoch": 16.50309119010819, "percentage": 82.52, "elapsed_time": "0:48:25", "remaining_time": "0:10:15", "throughput": 2474.27, "total_tokens": 7189520}
|
| 4288 |
+
{"current_steps": 21360, "total_steps": 25880, "loss": 4.5736, "lr": 4.505738178710253e-06, "epoch": 16.506955177743432, "percentage": 82.53, "elapsed_time": "0:48:26", "remaining_time": "0:10:15", "throughput": 2474.27, "total_tokens": 7191120}
|
| 4289 |
+
{"current_steps": 21365, "total_steps": 25880, "loss": 4.8491, "lr": 4.496087348464365e-06, "epoch": 16.51081916537867, "percentage": 82.55, "elapsed_time": "0:48:27", "remaining_time": "0:10:14", "throughput": 2474.3, "total_tokens": 7192816}
|
| 4290 |
+
{"current_steps": 21370, "total_steps": 25880, "loss": 4.7809, "lr": 4.486445843520751e-06, "epoch": 16.51468315301391, "percentage": 82.57, "elapsed_time": "0:48:27", "remaining_time": "0:10:13", "throughput": 2474.31, "total_tokens": 7194480}
|
| 4291 |
+
{"current_steps": 21375, "total_steps": 25880, "loss": 4.7266, "lr": 4.4768136682644124e-06, "epoch": 16.51854714064915, "percentage": 82.59, "elapsed_time": "0:48:28", "remaining_time": "0:10:12", "throughput": 2474.34, "total_tokens": 7196336}
|
| 4292 |
+
{"current_steps": 21380, "total_steps": 25880, "loss": 4.6276, "lr": 4.467190827076134e-06, "epoch": 16.52241112828439, "percentage": 82.61, "elapsed_time": "0:48:29", "remaining_time": "0:10:12", "throughput": 2474.33, "total_tokens": 7197840}
|
| 4293 |
+
{"current_steps": 21385, "total_steps": 25880, "loss": 4.7909, "lr": 4.457577324332432e-06, "epoch": 16.526275115919628, "percentage": 82.63, "elapsed_time": "0:48:29", "remaining_time": "0:10:11", "throughput": 2474.31, "total_tokens": 7199376}
|
| 4294 |
+
{"current_steps": 21390, "total_steps": 25880, "loss": 5.0165, "lr": 4.447973164405586e-06, "epoch": 16.53013910355487, "percentage": 82.65, "elapsed_time": "0:48:30", "remaining_time": "0:10:10", "throughput": 2474.3, "total_tokens": 7200976}
|
| 4295 |
+
{"current_steps": 21395, "total_steps": 25880, "loss": 5.0042, "lr": 4.438378351663627e-06, "epoch": 16.534003091190108, "percentage": 82.67, "elapsed_time": "0:48:30", "remaining_time": "0:10:10", "throughput": 2474.3, "total_tokens": 7202576}
|
| 4296 |
+
{"current_steps": 21400, "total_steps": 25880, "loss": 4.5754, "lr": 4.428792890470332e-06, "epoch": 16.537867078825347, "percentage": 82.69, "elapsed_time": "0:48:31", "remaining_time": "0:10:09", "throughput": 2474.3, "total_tokens": 7204112}
|
| 4297 |
+
{"current_steps": 21405, "total_steps": 25880, "loss": 4.546, "lr": 4.419216785185221e-06, "epoch": 16.54173106646059, "percentage": 82.71, "elapsed_time": "0:48:32", "remaining_time": "0:10:08", "throughput": 2474.3, "total_tokens": 7205648}
|
| 4298 |
+
{"current_steps": 21410, "total_steps": 25880, "loss": 4.5285, "lr": 4.4096500401635734e-06, "epoch": 16.545595054095827, "percentage": 82.73, "elapsed_time": "0:48:32", "remaining_time": "0:10:08", "throughput": 2474.32, "total_tokens": 7207216}
|
| 4299 |
+
{"current_steps": 21415, "total_steps": 25880, "loss": 4.9614, "lr": 4.400092659756397e-06, "epoch": 16.549459041731065, "percentage": 82.75, "elapsed_time": "0:48:33", "remaining_time": "0:10:07", "throughput": 2474.36, "total_tokens": 7208880}
|
| 4300 |
+
{"current_steps": 21420, "total_steps": 25880, "loss": 4.7277, "lr": 4.390544648310449e-06, "epoch": 16.553323029366307, "percentage": 82.77, "elapsed_time": "0:48:34", "remaining_time": "0:10:06", "throughput": 2474.39, "total_tokens": 7210480}
|
| 4301 |
+
{"current_steps": 21425, "total_steps": 25880, "loss": 4.9173, "lr": 4.38100601016822e-06, "epoch": 16.557187017001546, "percentage": 82.79, "elapsed_time": "0:48:34", "remaining_time": "0:10:06", "throughput": 2474.45, "total_tokens": 7212304}
|
| 4302 |
+
{"current_steps": 21430, "total_steps": 25880, "loss": 4.5453, "lr": 4.371476749667941e-06, "epoch": 16.561051004636784, "percentage": 82.81, "elapsed_time": "0:48:35", "remaining_time": "0:10:05", "throughput": 2474.42, "total_tokens": 7213712}
|
| 4303 |
+
{"current_steps": 21435, "total_steps": 25880, "loss": 4.5878, "lr": 4.361956871143577e-06, "epoch": 16.564914992272026, "percentage": 82.82, "elapsed_time": "0:48:35", "remaining_time": "0:10:04", "throughput": 2474.47, "total_tokens": 7215408}
|
| 4304 |
+
{"current_steps": 21440, "total_steps": 25880, "loss": 4.7126, "lr": 4.352446378924818e-06, "epoch": 16.568778979907265, "percentage": 82.84, "elapsed_time": "0:48:36", "remaining_time": "0:10:03", "throughput": 2474.47, "total_tokens": 7216912}
|
| 4305 |
+
{"current_steps": 21445, "total_steps": 25880, "loss": 4.8592, "lr": 4.342945277337104e-06, "epoch": 16.572642967542503, "percentage": 82.86, "elapsed_time": "0:48:37", "remaining_time": "0:10:03", "throughput": 2474.52, "total_tokens": 7218832}
|
| 4306 |
+
{"current_steps": 21450, "total_steps": 25880, "loss": 4.4821, "lr": 4.333453570701587e-06, "epoch": 16.576506955177745, "percentage": 82.88, "elapsed_time": "0:48:37", "remaining_time": "0:10:02", "throughput": 2474.51, "total_tokens": 7220304}
|
| 4307 |
+
{"current_steps": 21455, "total_steps": 25880, "loss": 4.2234, "lr": 4.32397126333515e-06, "epoch": 16.580370942812984, "percentage": 82.9, "elapsed_time": "0:48:38", "remaining_time": "0:10:01", "throughput": 2474.51, "total_tokens": 7221840}
|
| 4308 |
+
{"current_steps": 21460, "total_steps": 25880, "loss": 4.4964, "lr": 4.314498359550412e-06, "epoch": 16.584234930448222, "percentage": 82.92, "elapsed_time": "0:48:39", "remaining_time": "0:10:01", "throughput": 2474.53, "total_tokens": 7223408}
|
| 4309 |
+
{"current_steps": 21465, "total_steps": 25880, "loss": 4.5411, "lr": 4.3050348636556994e-06, "epoch": 16.58809891808346, "percentage": 82.94, "elapsed_time": "0:48:39", "remaining_time": "0:10:00", "throughput": 2474.61, "total_tokens": 7225520}
|
| 4310 |
+
{"current_steps": 21470, "total_steps": 25880, "loss": 4.5779, "lr": 4.295580779955066e-06, "epoch": 16.591962905718702, "percentage": 82.96, "elapsed_time": "0:48:40", "remaining_time": "0:09:59", "throughput": 2474.69, "total_tokens": 7227504}
|
| 4311 |
+
{"current_steps": 21475, "total_steps": 25880, "loss": 4.662, "lr": 4.286136112748285e-06, "epoch": 16.59582689335394, "percentage": 82.98, "elapsed_time": "0:48:41", "remaining_time": "0:09:59", "throughput": 2474.73, "total_tokens": 7229232}
|
| 4312 |
+
{"current_steps": 21480, "total_steps": 25880, "loss": 5.0494, "lr": 4.276700866330854e-06, "epoch": 16.59969088098918, "percentage": 83.0, "elapsed_time": "0:48:41", "remaining_time": "0:09:58", "throughput": 2474.73, "total_tokens": 7230768}
|
| 4313 |
+
{"current_steps": 21485, "total_steps": 25880, "loss": 4.7292, "lr": 4.267275044993979e-06, "epoch": 16.60355486862442, "percentage": 83.02, "elapsed_time": "0:48:42", "remaining_time": "0:09:57", "throughput": 2474.79, "total_tokens": 7232656}
|
| 4314 |
+
{"current_steps": 21490, "total_steps": 25880, "loss": 5.1795, "lr": 4.257858653024577e-06, "epoch": 16.60741885625966, "percentage": 83.04, "elapsed_time": "0:48:43", "remaining_time": "0:09:57", "throughput": 2474.82, "total_tokens": 7234384}
|
| 4315 |
+
{"current_steps": 21495, "total_steps": 25880, "loss": 4.513, "lr": 4.248451694705271e-06, "epoch": 16.611282843894898, "percentage": 83.06, "elapsed_time": "0:48:43", "remaining_time": "0:09:56", "throughput": 2474.84, "total_tokens": 7235984}
|
| 4316 |
+
{"current_steps": 21500, "total_steps": 25880, "loss": 4.8118, "lr": 4.239054174314417e-06, "epoch": 16.61514683153014, "percentage": 83.08, "elapsed_time": "0:48:44", "remaining_time": "0:09:55", "throughput": 2474.84, "total_tokens": 7237680}
|
| 4317 |
+
{"current_steps": 21505, "total_steps": 25880, "loss": 4.7754, "lr": 4.229666096126056e-06, "epoch": 16.61901081916538, "percentage": 83.1, "elapsed_time": "0:48:45", "remaining_time": "0:09:55", "throughput": 2474.9, "total_tokens": 7239568}
|
| 4318 |
+
{"current_steps": 21510, "total_steps": 25880, "loss": 4.9904, "lr": 4.220287464409939e-06, "epoch": 16.622874806800617, "percentage": 83.11, "elapsed_time": "0:48:45", "remaining_time": "0:09:54", "throughput": 2474.92, "total_tokens": 7241264}
|
| 4319 |
+
{"current_steps": 21515, "total_steps": 25880, "loss": 4.618, "lr": 4.210918283431534e-06, "epoch": 16.62673879443586, "percentage": 83.13, "elapsed_time": "0:48:46", "remaining_time": "0:09:53", "throughput": 2475.01, "total_tokens": 7243056}
|
| 4320 |
+
{"current_steps": 21520, "total_steps": 25880, "loss": 4.3202, "lr": 4.201558557451993e-06, "epoch": 16.630602782071097, "percentage": 83.15, "elapsed_time": "0:48:47", "remaining_time": "0:09:53", "throughput": 2475.01, "total_tokens": 7244592}
|
| 4321 |
+
{"current_steps": 21525, "total_steps": 25880, "loss": 4.8453, "lr": 4.192208290728178e-06, "epoch": 16.634466769706336, "percentage": 83.17, "elapsed_time": "0:48:47", "remaining_time": "0:09:52", "throughput": 2475.05, "total_tokens": 7246256}
|
| 4322 |
+
{"current_steps": 21530, "total_steps": 25880, "loss": 4.5988, "lr": 4.182867487512645e-06, "epoch": 16.638330757341578, "percentage": 83.19, "elapsed_time": "0:48:48", "remaining_time": "0:09:51", "throughput": 2475.07, "total_tokens": 7248016}
|
| 4323 |
+
{"current_steps": 21535, "total_steps": 25880, "loss": 4.6423, "lr": 4.173536152053642e-06, "epoch": 16.642194744976816, "percentage": 83.21, "elapsed_time": "0:48:49", "remaining_time": "0:09:50", "throughput": 2475.12, "total_tokens": 7249680}
|
| 4324 |
+
{"current_steps": 21540, "total_steps": 25880, "loss": 4.5177, "lr": 4.164214288595128e-06, "epoch": 16.646058732612055, "percentage": 83.23, "elapsed_time": "0:48:49", "remaining_time": "0:09:50", "throughput": 2475.1, "total_tokens": 7251248}
|
| 4325 |
+
{"current_steps": 21545, "total_steps": 25880, "loss": 4.6362, "lr": 4.154901901376729e-06, "epoch": 16.649922720247297, "percentage": 83.25, "elapsed_time": "0:48:50", "remaining_time": "0:09:49", "throughput": 2475.17, "total_tokens": 7253072}
|
| 4326 |
+
{"current_steps": 21550, "total_steps": 25880, "loss": 4.8589, "lr": 4.14559899463379e-06, "epoch": 16.653786707882535, "percentage": 83.27, "elapsed_time": "0:48:50", "remaining_time": "0:09:48", "throughput": 2475.15, "total_tokens": 7254640}
|
| 4327 |
+
{"current_steps": 21555, "total_steps": 25880, "loss": 4.891, "lr": 4.136305572597318e-06, "epoch": 16.657650695517773, "percentage": 83.29, "elapsed_time": "0:48:51", "remaining_time": "0:09:48", "throughput": 2475.22, "total_tokens": 7256464}
|
| 4328 |
+
{"current_steps": 21560, "total_steps": 25880, "loss": 4.6982, "lr": 4.127021639494022e-06, "epoch": 16.661514683153015, "percentage": 83.31, "elapsed_time": "0:48:52", "remaining_time": "0:09:47", "throughput": 2475.28, "total_tokens": 7258192}
|
| 4329 |
+
{"current_steps": 21565, "total_steps": 25880, "loss": 4.6829, "lr": 4.117747199546285e-06, "epoch": 16.665378670788254, "percentage": 83.33, "elapsed_time": "0:48:52", "remaining_time": "0:09:46", "throughput": 2475.25, "total_tokens": 7259728}
|
| 4330 |
+
{"current_steps": 21570, "total_steps": 25880, "loss": 4.8203, "lr": 4.108482256972182e-06, "epoch": 16.669242658423492, "percentage": 83.35, "elapsed_time": "0:48:53", "remaining_time": "0:09:46", "throughput": 2475.33, "total_tokens": 7261680}
|
| 4331 |
+
{"current_steps": 21575, "total_steps": 25880, "loss": 5.007, "lr": 4.099226815985458e-06, "epoch": 16.673106646058734, "percentage": 83.37, "elapsed_time": "0:48:54", "remaining_time": "0:09:45", "throughput": 2475.37, "total_tokens": 7263440}
|
| 4332 |
+
{"current_steps": 21580, "total_steps": 25880, "loss": 4.5126, "lr": 4.089980880795543e-06, "epoch": 16.676970633693973, "percentage": 83.38, "elapsed_time": "0:48:54", "remaining_time": "0:09:44", "throughput": 2475.38, "total_tokens": 7264976}
|
| 4333 |
+
{"current_steps": 21585, "total_steps": 25880, "loss": 4.7609, "lr": 4.08074445560756e-06, "epoch": 16.68083462132921, "percentage": 83.4, "elapsed_time": "0:48:55", "remaining_time": "0:09:44", "throughput": 2475.41, "total_tokens": 7266608}
|
| 4334 |
+
{"current_steps": 21590, "total_steps": 25880, "loss": 4.5621, "lr": 4.071517544622278e-06, "epoch": 16.684698608964453, "percentage": 83.42, "elapsed_time": "0:48:56", "remaining_time": "0:09:43", "throughput": 2475.43, "total_tokens": 7268304}
|
| 4335 |
+
{"current_steps": 21595, "total_steps": 25880, "loss": 4.4443, "lr": 4.0623001520361494e-06, "epoch": 16.68856259659969, "percentage": 83.44, "elapsed_time": "0:48:56", "remaining_time": "0:09:42", "throughput": 2475.44, "total_tokens": 7270128}
|
| 4336 |
+
{"current_steps": 21600, "total_steps": 25880, "loss": 4.6808, "lr": 4.053092282041307e-06, "epoch": 16.69242658423493, "percentage": 83.46, "elapsed_time": "0:48:57", "remaining_time": "0:09:42", "throughput": 2475.49, "total_tokens": 7271888}
|
| 4337 |
+
{"current_steps": 21605, "total_steps": 25880, "loss": 4.6146, "lr": 4.043893938825538e-06, "epoch": 16.69629057187017, "percentage": 83.48, "elapsed_time": "0:48:58", "remaining_time": "0:09:41", "throughput": 2475.47, "total_tokens": 7273456}
|
| 4338 |
+
{"current_steps": 21610, "total_steps": 25880, "loss": 4.2665, "lr": 4.034705126572299e-06, "epoch": 16.70015455950541, "percentage": 83.5, "elapsed_time": "0:48:58", "remaining_time": "0:09:40", "throughput": 2475.49, "total_tokens": 7275120}
|
| 4339 |
+
{"current_steps": 21615, "total_steps": 25880, "loss": 4.6885, "lr": 4.025525849460729e-06, "epoch": 16.70401854714065, "percentage": 83.52, "elapsed_time": "0:48:59", "remaining_time": "0:09:40", "throughput": 2475.53, "total_tokens": 7277040}
|
| 4340 |
+
{"current_steps": 21620, "total_steps": 25880, "loss": 4.6715, "lr": 4.016356111665617e-06, "epoch": 16.707882534775887, "percentage": 83.54, "elapsed_time": "0:49:00", "remaining_time": "0:09:39", "throughput": 2475.59, "total_tokens": 7278768}
|
| 4341 |
+
{"current_steps": 21625, "total_steps": 25880, "loss": 4.4967, "lr": 4.007195917357412e-06, "epoch": 16.71174652241113, "percentage": 83.56, "elapsed_time": "0:49:00", "remaining_time": "0:09:38", "throughput": 2475.56, "total_tokens": 7280176}
|
| 4342 |
+
{"current_steps": 21630, "total_steps": 25880, "loss": 5.0394, "lr": 3.998045270702227e-06, "epoch": 16.715610510046368, "percentage": 83.58, "elapsed_time": "0:49:01", "remaining_time": "0:09:37", "throughput": 2475.57, "total_tokens": 7281744}
|
| 4343 |
+
{"current_steps": 21635, "total_steps": 25880, "loss": 4.5084, "lr": 3.988904175861827e-06, "epoch": 16.719474497681606, "percentage": 83.6, "elapsed_time": "0:49:02", "remaining_time": "0:09:37", "throughput": 2475.56, "total_tokens": 7283248}
|
| 4344 |
+
{"current_steps": 21640, "total_steps": 25880, "loss": 4.9877, "lr": 3.979772636993636e-06, "epoch": 16.723338485316848, "percentage": 83.62, "elapsed_time": "0:49:02", "remaining_time": "0:09:36", "throughput": 2475.61, "total_tokens": 7285008}
|
| 4345 |
+
{"current_steps": 21645, "total_steps": 25880, "loss": 4.7062, "lr": 3.970650658250732e-06, "epoch": 16.727202472952087, "percentage": 83.64, "elapsed_time": "0:49:03", "remaining_time": "0:09:35", "throughput": 2475.66, "total_tokens": 7286800}
|
| 4346 |
+
{"current_steps": 21650, "total_steps": 25880, "loss": 5.011, "lr": 3.961538243781854e-06, "epoch": 16.731066460587325, "percentage": 83.66, "elapsed_time": "0:49:04", "remaining_time": "0:09:35", "throughput": 2475.69, "total_tokens": 7288592}
|
| 4347 |
+
{"current_steps": 21655, "total_steps": 25880, "loss": 4.7908, "lr": 3.9524353977313715e-06, "epoch": 16.734930448222567, "percentage": 83.67, "elapsed_time": "0:49:04", "remaining_time": "0:09:34", "throughput": 2475.72, "total_tokens": 7290288}
|
| 4348 |
+
{"current_steps": 21660, "total_steps": 25880, "loss": 4.4118, "lr": 3.943342124239324e-06, "epoch": 16.738794435857805, "percentage": 83.69, "elapsed_time": "0:49:05", "remaining_time": "0:09:33", "throughput": 2475.7, "total_tokens": 7291760}
|
| 4349 |
+
{"current_steps": 21665, "total_steps": 25880, "loss": 4.9019, "lr": 3.934258427441381e-06, "epoch": 16.742658423493044, "percentage": 83.71, "elapsed_time": "0:49:05", "remaining_time": "0:09:33", "throughput": 2475.71, "total_tokens": 7293392}
|
| 4350 |
+
{"current_steps": 21670, "total_steps": 25880, "loss": 4.7284, "lr": 3.925184311468865e-06, "epoch": 16.746522411128286, "percentage": 83.73, "elapsed_time": "0:49:06", "remaining_time": "0:09:32", "throughput": 2475.68, "total_tokens": 7294832}
|
| 4351 |
+
{"current_steps": 21675, "total_steps": 25880, "loss": 4.8024, "lr": 3.916119780448735e-06, "epoch": 16.750386398763524, "percentage": 83.75, "elapsed_time": "0:49:07", "remaining_time": "0:09:31", "throughput": 2475.69, "total_tokens": 7296368}
|
| 4352 |
+
{"current_steps": 21680, "total_steps": 25880, "loss": 4.486, "lr": 3.907064838503591e-06, "epoch": 16.754250386398763, "percentage": 83.77, "elapsed_time": "0:49:07", "remaining_time": "0:09:31", "throughput": 2475.72, "total_tokens": 7297968}
|
| 4353 |
+
{"current_steps": 21685, "total_steps": 25880, "loss": 4.8855, "lr": 3.898019489751684e-06, "epoch": 16.758114374034005, "percentage": 83.79, "elapsed_time": "0:49:08", "remaining_time": "0:09:30", "throughput": 2475.78, "total_tokens": 7299696}
|
| 4354 |
+
{"current_steps": 21690, "total_steps": 25880, "loss": 4.4605, "lr": 3.8889837383068864e-06, "epoch": 16.761978361669243, "percentage": 83.81, "elapsed_time": "0:49:09", "remaining_time": "0:09:29", "throughput": 2475.83, "total_tokens": 7301552}
|
| 4355 |
+
{"current_steps": 21695, "total_steps": 25880, "loss": 4.6538, "lr": 3.879957588278707e-06, "epoch": 16.76584234930448, "percentage": 83.83, "elapsed_time": "0:49:09", "remaining_time": "0:09:29", "throughput": 2475.86, "total_tokens": 7303376}
|
| 4356 |
+
{"current_steps": 21700, "total_steps": 25880, "loss": 4.504, "lr": 3.870941043772308e-06, "epoch": 16.769706336939723, "percentage": 83.85, "elapsed_time": "0:49:10", "remaining_time": "0:09:28", "throughput": 2475.89, "total_tokens": 7304976}
|
| 4357 |
+
{"current_steps": 21705, "total_steps": 25880, "loss": 4.8444, "lr": 3.8619341088884595e-06, "epoch": 16.773570324574962, "percentage": 83.87, "elapsed_time": "0:49:11", "remaining_time": "0:09:27", "throughput": 2475.89, "total_tokens": 7306608}
|
| 4358 |
+
{"current_steps": 21710, "total_steps": 25880, "loss": 4.703, "lr": 3.852936787723568e-06, "epoch": 16.7774343122102, "percentage": 83.89, "elapsed_time": "0:49:11", "remaining_time": "0:09:26", "throughput": 2475.95, "total_tokens": 7308304}
|
| 4359 |
+
{"current_steps": 21715, "total_steps": 25880, "loss": 4.7625, "lr": 3.843949084369663e-06, "epoch": 16.78129829984544, "percentage": 83.91, "elapsed_time": "0:49:12", "remaining_time": "0:09:26", "throughput": 2475.95, "total_tokens": 7309936}
|
| 4360 |
+
{"current_steps": 21720, "total_steps": 25880, "loss": 4.3034, "lr": 3.83497100291442e-06, "epoch": 16.78516228748068, "percentage": 83.93, "elapsed_time": "0:49:12", "remaining_time": "0:09:25", "throughput": 2475.96, "total_tokens": 7311472}
|
| 4361 |
+
{"current_steps": 21725, "total_steps": 25880, "loss": 4.6865, "lr": 3.826002547441118e-06, "epoch": 16.78902627511592, "percentage": 83.95, "elapsed_time": "0:49:13", "remaining_time": "0:09:24", "throughput": 2475.98, "total_tokens": 7313168}
|
| 4362 |
+
{"current_steps": 21730, "total_steps": 25880, "loss": 4.9441, "lr": 3.817043722028663e-06, "epoch": 16.792890262751158, "percentage": 83.96, "elapsed_time": "0:49:14", "remaining_time": "0:09:24", "throughput": 2475.94, "total_tokens": 7314640}
|
| 4363 |
+
{"current_steps": 21735, "total_steps": 25880, "loss": 5.1309, "lr": 3.808094530751577e-06, "epoch": 16.7967542503864, "percentage": 83.98, "elapsed_time": "0:49:14", "remaining_time": "0:09:23", "throughput": 2475.95, "total_tokens": 7316208}
|
| 4364 |
+
{"current_steps": 21740, "total_steps": 25880, "loss": 4.5417, "lr": 3.7991549776800197e-06, "epoch": 16.800618238021638, "percentage": 84.0, "elapsed_time": "0:49:15", "remaining_time": "0:09:22", "throughput": 2475.94, "total_tokens": 7317872}
|
| 4365 |
+
{"current_steps": 21745, "total_steps": 25880, "loss": 4.5472, "lr": 3.7902250668797435e-06, "epoch": 16.804482225656876, "percentage": 84.02, "elapsed_time": "0:49:16", "remaining_time": "0:09:22", "throughput": 2476.02, "total_tokens": 7319728}
|
| 4366 |
+
{"current_steps": 21750, "total_steps": 25880, "loss": 4.7375, "lr": 3.7813048024121196e-06, "epoch": 16.80834621329212, "percentage": 84.04, "elapsed_time": "0:49:16", "remaining_time": "0:09:21", "throughput": 2476.05, "total_tokens": 7321456}
|
| 4367 |
+
{"current_steps": 21755, "total_steps": 25880, "loss": 4.5176, "lr": 3.7723941883341526e-06, "epoch": 16.812210200927357, "percentage": 84.06, "elapsed_time": "0:49:17", "remaining_time": "0:09:20", "throughput": 2476.05, "total_tokens": 7323056}
|
| 4368 |
+
{"current_steps": 21760, "total_steps": 25880, "loss": 4.4373, "lr": 3.7634932286984363e-06, "epoch": 16.816074188562595, "percentage": 84.08, "elapsed_time": "0:49:18", "remaining_time": "0:09:20", "throughput": 2476.04, "total_tokens": 7324560}
|
| 4369 |
+
{"current_steps": 21765, "total_steps": 25880, "loss": 4.5598, "lr": 3.7546019275531806e-06, "epoch": 16.819938176197837, "percentage": 84.1, "elapsed_time": "0:49:18", "remaining_time": "0:09:19", "throughput": 2476.04, "total_tokens": 7326096}
|
| 4370 |
+
{"current_steps": 21770, "total_steps": 25880, "loss": 4.7452, "lr": 3.7457202889422004e-06, "epoch": 16.823802163833076, "percentage": 84.12, "elapsed_time": "0:49:19", "remaining_time": "0:09:18", "throughput": 2476.06, "total_tokens": 7327664}
|
| 4371 |
+
{"current_steps": 21775, "total_steps": 25880, "loss": 4.3841, "lr": 3.736848316904923e-06, "epoch": 16.827666151468314, "percentage": 84.14, "elapsed_time": "0:49:20", "remaining_time": "0:09:18", "throughput": 2476.08, "total_tokens": 7329456}
|
| 4372 |
+
{"current_steps": 21780, "total_steps": 25880, "loss": 4.7309, "lr": 3.727986015476362e-06, "epoch": 16.831530139103556, "percentage": 84.16, "elapsed_time": "0:49:20", "remaining_time": "0:09:17", "throughput": 2476.1, "total_tokens": 7331024}
|
| 4373 |
+
{"current_steps": 21785, "total_steps": 25880, "loss": 4.8856, "lr": 3.7191333886871543e-06, "epoch": 16.835394126738795, "percentage": 84.18, "elapsed_time": "0:49:21", "remaining_time": "0:09:16", "throughput": 2476.13, "total_tokens": 7332752}
|
| 4374 |
+
{"current_steps": 21790, "total_steps": 25880, "loss": 5.0622, "lr": 3.710290440563535e-06, "epoch": 16.839258114374033, "percentage": 84.2, "elapsed_time": "0:49:22", "remaining_time": "0:09:15", "throughput": 2476.2, "total_tokens": 7334576}
|
| 4375 |
+
{"current_steps": 21795, "total_steps": 25880, "loss": 4.7717, "lr": 3.7014571751273207e-06, "epoch": 16.843122102009275, "percentage": 84.22, "elapsed_time": "0:49:22", "remaining_time": "0:09:15", "throughput": 2476.23, "total_tokens": 7336464}
|
| 4376 |
+
{"current_steps": 21800, "total_steps": 25880, "loss": 4.8668, "lr": 3.692633596395936e-06, "epoch": 16.846986089644513, "percentage": 84.23, "elapsed_time": "0:49:23", "remaining_time": "0:09:14", "throughput": 2476.2, "total_tokens": 7338000}
|
| 4377 |
+
{"current_steps": 21805, "total_steps": 25880, "loss": 4.8346, "lr": 3.6838197083823965e-06, "epoch": 16.850850077279752, "percentage": 84.25, "elapsed_time": "0:49:24", "remaining_time": "0:09:13", "throughput": 2476.29, "total_tokens": 7339888}
|
| 4378 |
+
{"current_steps": 21810, "total_steps": 25880, "loss": 4.7336, "lr": 3.675015515095312e-06, "epoch": 16.854714064914994, "percentage": 84.27, "elapsed_time": "0:49:24", "remaining_time": "0:09:13", "throughput": 2476.29, "total_tokens": 7341520}
|
| 4379 |
+
{"current_steps": 21815, "total_steps": 25880, "loss": 4.4765, "lr": 3.6662210205388766e-06, "epoch": 16.858578052550232, "percentage": 84.29, "elapsed_time": "0:49:25", "remaining_time": "0:09:12", "throughput": 2476.29, "total_tokens": 7343216}
|
| 4380 |
+
{"current_steps": 21820, "total_steps": 25880, "loss": 4.8654, "lr": 3.657436228712882e-06, "epoch": 16.86244204018547, "percentage": 84.31, "elapsed_time": "0:49:26", "remaining_time": "0:09:11", "throughput": 2476.3, "total_tokens": 7344848}
|
| 4381 |
+
{"current_steps": 21825, "total_steps": 25880, "loss": 4.5041, "lr": 3.648661143612711e-06, "epoch": 16.866306027820713, "percentage": 84.33, "elapsed_time": "0:49:26", "remaining_time": "0:09:11", "throughput": 2476.36, "total_tokens": 7346736}
|
| 4382 |
+
{"current_steps": 21830, "total_steps": 25880, "loss": 4.5201, "lr": 3.6398957692293205e-06, "epoch": 16.87017001545595, "percentage": 84.35, "elapsed_time": "0:49:27", "remaining_time": "0:09:10", "throughput": 2476.39, "total_tokens": 7348464}
|
| 4383 |
+
{"current_steps": 21835, "total_steps": 25880, "loss": 4.4328, "lr": 3.631140109549258e-06, "epoch": 16.87403400309119, "percentage": 84.37, "elapsed_time": "0:49:28", "remaining_time": "0:09:09", "throughput": 2476.44, "total_tokens": 7350320}
|
| 4384 |
+
{"current_steps": 21840, "total_steps": 25880, "loss": 4.6651, "lr": 3.622394168554644e-06, "epoch": 16.87789799072643, "percentage": 84.39, "elapsed_time": "0:49:28", "remaining_time": "0:09:09", "throughput": 2476.48, "total_tokens": 7352176}
|
| 4385 |
+
{"current_steps": 21845, "total_steps": 25880, "loss": 4.6537, "lr": 3.613657950223187e-06, "epoch": 16.88176197836167, "percentage": 84.41, "elapsed_time": "0:49:29", "remaining_time": "0:09:08", "throughput": 2476.57, "total_tokens": 7354000}
|
| 4386 |
+
{"current_steps": 21850, "total_steps": 25880, "loss": 4.6641, "lr": 3.6049314585281686e-06, "epoch": 16.88562596599691, "percentage": 84.43, "elapsed_time": "0:49:30", "remaining_time": "0:09:07", "throughput": 2476.62, "total_tokens": 7355760}
|
| 4387 |
+
{"current_steps": 21855, "total_steps": 25880, "loss": 4.5613, "lr": 3.5962146974384575e-06, "epoch": 16.889489953632147, "percentage": 84.45, "elapsed_time": "0:49:30", "remaining_time": "0:09:07", "throughput": 2476.63, "total_tokens": 7357328}
|
| 4388 |
+
{"current_steps": 21860, "total_steps": 25880, "loss": 4.4521, "lr": 3.5875076709184773e-06, "epoch": 16.89335394126739, "percentage": 84.47, "elapsed_time": "0:49:31", "remaining_time": "0:09:06", "throughput": 2476.61, "total_tokens": 7358864}
|
| 4389 |
+
{"current_steps": 21865, "total_steps": 25880, "loss": 4.764, "lr": 3.578810382928249e-06, "epoch": 16.897217928902627, "percentage": 84.49, "elapsed_time": "0:49:32", "remaining_time": "0:09:05", "throughput": 2476.67, "total_tokens": 7360784}
|
| 4390 |
+
{"current_steps": 21870, "total_steps": 25880, "loss": 4.6587, "lr": 3.570122837423348e-06, "epoch": 16.901081916537866, "percentage": 84.51, "elapsed_time": "0:49:32", "remaining_time": "0:09:05", "throughput": 2476.65, "total_tokens": 7362256}
|
| 4391 |
+
{"current_steps": 21875, "total_steps": 25880, "loss": 4.6796, "lr": 3.5614450383549157e-06, "epoch": 16.904945904173108, "percentage": 84.52, "elapsed_time": "0:49:33", "remaining_time": "0:09:04", "throughput": 2476.65, "total_tokens": 7363760}
|
| 4392 |
+
{"current_steps": 21880, "total_steps": 25880, "loss": 4.3427, "lr": 3.5527769896696706e-06, "epoch": 16.908809891808346, "percentage": 84.54, "elapsed_time": "0:49:33", "remaining_time": "0:09:03", "throughput": 2476.71, "total_tokens": 7365648}
|
| 4393 |
+
{"current_steps": 21885, "total_steps": 25880, "loss": 4.659, "lr": 3.5441186953098894e-06, "epoch": 16.912673879443584, "percentage": 84.56, "elapsed_time": "0:49:34", "remaining_time": "0:09:02", "throughput": 2476.68, "total_tokens": 7367088}
|
| 4394 |
+
{"current_steps": 21890, "total_steps": 25880, "loss": 4.6728, "lr": 3.535470159213425e-06, "epoch": 16.916537867078826, "percentage": 84.58, "elapsed_time": "0:49:35", "remaining_time": "0:09:02", "throughput": 2476.74, "total_tokens": 7368880}
|
| 4395 |
+
{"current_steps": 21895, "total_steps": 25880, "loss": 4.8509, "lr": 3.5268313853136754e-06, "epoch": 16.920401854714065, "percentage": 84.6, "elapsed_time": "0:49:35", "remaining_time": "0:09:01", "throughput": 2476.73, "total_tokens": 7370384}
|
| 4396 |
+
{"current_steps": 21900, "total_steps": 25880, "loss": 4.7485, "lr": 3.5182023775396062e-06, "epoch": 16.924265842349303, "percentage": 84.62, "elapsed_time": "0:49:36", "remaining_time": "0:09:00", "throughput": 2476.85, "total_tokens": 7372528}
|
| 4397 |
+
{"current_steps": 21905, "total_steps": 25880, "loss": 4.5651, "lr": 3.50958313981575e-06, "epoch": 16.928129829984545, "percentage": 84.64, "elapsed_time": "0:49:37", "remaining_time": "0:09:00", "throughput": 2476.85, "total_tokens": 7374064}
|
| 4398 |
+
{"current_steps": 21910, "total_steps": 25880, "loss": 4.5629, "lr": 3.500973676062183e-06, "epoch": 16.931993817619784, "percentage": 84.66, "elapsed_time": "0:49:37", "remaining_time": "0:08:59", "throughput": 2476.88, "total_tokens": 7375952}
|
| 4399 |
+
{"current_steps": 21915, "total_steps": 25880, "loss": 4.5627, "lr": 3.492373990194542e-06, "epoch": 16.935857805255022, "percentage": 84.68, "elapsed_time": "0:49:38", "remaining_time": "0:08:58", "throughput": 2476.88, "total_tokens": 7377488}
|
| 4400 |
+
{"current_steps": 21920, "total_steps": 25880, "loss": 4.7683, "lr": 3.483784086124009e-06, "epoch": 16.939721792890264, "percentage": 84.7, "elapsed_time": "0:49:39", "remaining_time": "0:08:58", "throughput": 2476.97, "total_tokens": 7379312}
|
| 4401 |
+
{"current_steps": 21925, "total_steps": 25880, "loss": 4.4385, "lr": 3.4752039677573316e-06, "epoch": 16.943585780525503, "percentage": 84.72, "elapsed_time": "0:49:39", "remaining_time": "0:08:57", "throughput": 2476.97, "total_tokens": 7380848}
|
| 4402 |
+
{"current_steps": 21930, "total_steps": 25880, "loss": 4.6535, "lr": 3.4666336389967996e-06, "epoch": 16.94744976816074, "percentage": 84.74, "elapsed_time": "0:49:40", "remaining_time": "0:08:56", "throughput": 2477.02, "total_tokens": 7382608}
|
| 4403 |
+
{"current_steps": 21935, "total_steps": 25880, "loss": 4.5791, "lr": 3.458073103740245e-06, "epoch": 16.951313755795983, "percentage": 84.76, "elapsed_time": "0:49:41", "remaining_time": "0:08:56", "throughput": 2477.05, "total_tokens": 7384336}
|
| 4404 |
+
{"current_steps": 21940, "total_steps": 25880, "loss": 4.6586, "lr": 3.44952236588105e-06, "epoch": 16.95517774343122, "percentage": 84.78, "elapsed_time": "0:49:41", "remaining_time": "0:08:55", "throughput": 2477.06, "total_tokens": 7385872}
|
| 4405 |
+
{"current_steps": 21945, "total_steps": 25880, "loss": 4.4402, "lr": 3.440981429308146e-06, "epoch": 16.95904173106646, "percentage": 84.8, "elapsed_time": "0:49:42", "remaining_time": "0:08:54", "throughput": 2477.09, "total_tokens": 7387600}
|
| 4406 |
+
{"current_steps": 21950, "total_steps": 25880, "loss": 4.5467, "lr": 3.4324502979060006e-06, "epoch": 16.962905718701702, "percentage": 84.81, "elapsed_time": "0:49:43", "remaining_time": "0:08:54", "throughput": 2477.1, "total_tokens": 7389232}
|
| 4407 |
+
{"current_steps": 21955, "total_steps": 25880, "loss": 4.9285, "lr": 3.423928975554616e-06, "epoch": 16.96676970633694, "percentage": 84.83, "elapsed_time": "0:49:43", "remaining_time": "0:08:53", "throughput": 2477.13, "total_tokens": 7391056}
|
| 4408 |
+
{"current_steps": 21960, "total_steps": 25880, "loss": 4.4999, "lr": 3.415417466129556e-06, "epoch": 16.97063369397218, "percentage": 84.85, "elapsed_time": "0:49:44", "remaining_time": "0:08:52", "throughput": 2477.18, "total_tokens": 7392816}
|
| 4409 |
+
{"current_steps": 21965, "total_steps": 25880, "loss": 4.7037, "lr": 3.4069157735018953e-06, "epoch": 16.974497681607417, "percentage": 84.87, "elapsed_time": "0:49:45", "remaining_time": "0:08:52", "throughput": 2477.19, "total_tokens": 7394480}
|
| 4410 |
+
{"current_steps": 21970, "total_steps": 25880, "loss": 4.7192, "lr": 3.3984239015382557e-06, "epoch": 16.97836166924266, "percentage": 84.89, "elapsed_time": "0:49:45", "remaining_time": "0:08:51", "throughput": 2477.18, "total_tokens": 7396080}
|
| 4411 |
+
{"current_steps": 21975, "total_steps": 25880, "loss": 4.5416, "lr": 3.3899418541007947e-06, "epoch": 16.982225656877898, "percentage": 84.91, "elapsed_time": "0:49:46", "remaining_time": "0:08:50", "throughput": 2477.18, "total_tokens": 7397680}
|
| 4412 |
+
{"current_steps": 21980, "total_steps": 25880, "loss": 4.7053, "lr": 3.381469635047191e-06, "epoch": 16.986089644513136, "percentage": 84.93, "elapsed_time": "0:49:46", "remaining_time": "0:08:49", "throughput": 2477.22, "total_tokens": 7399344}
|
| 4413 |
+
{"current_steps": 21985, "total_steps": 25880, "loss": 4.5564, "lr": 3.3730072482306697e-06, "epoch": 16.989953632148378, "percentage": 84.95, "elapsed_time": "0:49:47", "remaining_time": "0:08:49", "throughput": 2477.26, "total_tokens": 7401008}
|
| 4414 |
+
{"current_steps": 21990, "total_steps": 25880, "loss": 4.7937, "lr": 3.3645546974999636e-06, "epoch": 16.993817619783616, "percentage": 84.97, "elapsed_time": "0:49:48", "remaining_time": "0:08:48", "throughput": 2477.27, "total_tokens": 7402576}
|
| 4415 |
+
{"current_steps": 21995, "total_steps": 25880, "loss": 4.7687, "lr": 3.356111986699359e-06, "epoch": 16.997681607418855, "percentage": 84.99, "elapsed_time": "0:49:48", "remaining_time": "0:08:47", "throughput": 2477.26, "total_tokens": 7404144}
|
| 4416 |
+
{"current_steps": 21998, "total_steps": 25880, "eval_loss": 4.679266929626465, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:49:55", "remaining_time": "0:08:48", "throughput": 2472.23, "total_tokens": 7405056}
|
| 4417 |
+
{"current_steps": 22000, "total_steps": 25880, "loss": 4.4333, "lr": 3.3476791196686426e-06, "epoch": 17.001545595054097, "percentage": 85.01, "elapsed_time": "0:49:56", "remaining_time": "0:08:48", "throughput": 2471.42, "total_tokens": 7405952}
|
| 4418 |
+
{"current_steps": 22005, "total_steps": 25880, "loss": 4.748, "lr": 3.3392561002431323e-06, "epoch": 17.005409582689335, "percentage": 85.03, "elapsed_time": "0:49:57", "remaining_time": "0:08:47", "throughput": 2471.45, "total_tokens": 7407776}
|
| 4419 |
+
{"current_steps": 22010, "total_steps": 25880, "loss": 4.8291, "lr": 3.3308429322536692e-06, "epoch": 17.009273570324574, "percentage": 85.05, "elapsed_time": "0:49:58", "remaining_time": "0:08:47", "throughput": 2471.43, "total_tokens": 7409344}
|
| 4420 |
+
{"current_steps": 22015, "total_steps": 25880, "loss": 4.6737, "lr": 3.3224396195266127e-06, "epoch": 17.013137557959816, "percentage": 85.07, "elapsed_time": "0:49:58", "remaining_time": "0:08:46", "throughput": 2471.47, "total_tokens": 7411104}
|
| 4421 |
+
{"current_steps": 22020, "total_steps": 25880, "loss": 4.4682, "lr": 3.31404616588383e-06, "epoch": 17.017001545595054, "percentage": 85.09, "elapsed_time": "0:49:59", "remaining_time": "0:08:45", "throughput": 2471.5, "total_tokens": 7412832}
|
| 4422 |
+
{"current_steps": 22025, "total_steps": 25880, "loss": 4.7041, "lr": 3.3056625751427317e-06, "epoch": 17.020865533230292, "percentage": 85.1, "elapsed_time": "0:50:00", "remaining_time": "0:08:45", "throughput": 2471.52, "total_tokens": 7414656}
|
| 4423 |
+
{"current_steps": 22030, "total_steps": 25880, "loss": 5.0235, "lr": 3.297288851116212e-06, "epoch": 17.024729520865534, "percentage": 85.12, "elapsed_time": "0:50:00", "remaining_time": "0:08:44", "throughput": 2471.47, "total_tokens": 7416384}
|
| 4424 |
+
{"current_steps": 22035, "total_steps": 25880, "loss": 4.9242, "lr": 3.2889249976126995e-06, "epoch": 17.028593508500773, "percentage": 85.14, "elapsed_time": "0:50:01", "remaining_time": "0:08:43", "throughput": 2471.44, "total_tokens": 7417824}
|