Training in progress, step 15694
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +166 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7d51d738cbb5a567a4826fbdc9266200e484b0ef298364fc0489ea62e61b09a
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -2992,3 +2992,169 @@
|
|
| 2992 |
{"current_steps": 14870, "total_steps": 16520, "loss": 0.1555, "lr": 1.5058782924478431e-06, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:40:39", "remaining_time": "0:04:30", "throughput": 2498.97, "total_tokens": 6096512}
|
| 2993 |
{"current_steps": 14875, "total_steps": 16520, "loss": 0.0721, "lr": 1.4968630887529339e-06, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:40:40", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6098624}
|
| 2994 |
{"current_steps": 14880, "total_steps": 16520, "loss": 0.0541, "lr": 1.4878741188118744e-06, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:40:41", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6100736}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2992 |
{"current_steps": 14870, "total_steps": 16520, "loss": 0.1555, "lr": 1.5058782924478431e-06, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:40:39", "remaining_time": "0:04:30", "throughput": 2498.97, "total_tokens": 6096512}
|
| 2993 |
{"current_steps": 14875, "total_steps": 16520, "loss": 0.0721, "lr": 1.4968630887529339e-06, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:40:40", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6098624}
|
| 2994 |
{"current_steps": 14880, "total_steps": 16520, "loss": 0.0541, "lr": 1.4878741188118744e-06, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:40:41", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6100736}
|
| 2995 |
+
{"current_steps": 14885, "total_steps": 16520, "loss": 0.1131, "lr": 1.478911392657989e-06, "epoch": 18.020581113801452, "percentage": 90.1, "elapsed_time": "0:40:42", "remaining_time": "0:04:28", "throughput": 2498.91, "total_tokens": 6102880}
|
| 2996 |
+
{"current_steps": 14890, "total_steps": 16520, "loss": 0.115, "lr": 1.469974920295289e-06, "epoch": 18.026634382566584, "percentage": 90.13, "elapsed_time": "0:40:42", "remaining_time": "0:04:27", "throughput": 2498.92, "total_tokens": 6104864}
|
| 2997 |
+
{"current_steps": 14895, "total_steps": 16520, "loss": 0.0471, "lr": 1.4610647116985037e-06, "epoch": 18.03268765133172, "percentage": 90.16, "elapsed_time": "0:40:43", "remaining_time": "0:04:26", "throughput": 2498.94, "total_tokens": 6106944}
|
| 2998 |
+
{"current_steps": 14900, "total_steps": 16520, "loss": 0.0833, "lr": 1.4521807768130364e-06, "epoch": 18.038740920096853, "percentage": 90.19, "elapsed_time": "0:40:44", "remaining_time": "0:04:25", "throughput": 2499.0, "total_tokens": 6109056}
|
| 2999 |
+
{"current_steps": 14905, "total_steps": 16520, "loss": 0.1159, "lr": 1.4433231255549655e-06, "epoch": 18.044794188861985, "percentage": 90.22, "elapsed_time": "0:40:45", "remaining_time": "0:04:24", "throughput": 2499.03, "total_tokens": 6111168}
|
| 3000 |
+
{"current_steps": 14910, "total_steps": 16520, "loss": 0.0544, "lr": 1.4344917678110303e-06, "epoch": 18.050847457627118, "percentage": 90.25, "elapsed_time": "0:40:46", "remaining_time": "0:04:24", "throughput": 2499.04, "total_tokens": 6113152}
|
| 3001 |
+
{"current_steps": 14915, "total_steps": 16520, "loss": 0.0354, "lr": 1.4256867134386288e-06, "epoch": 18.05690072639225, "percentage": 90.28, "elapsed_time": "0:40:46", "remaining_time": "0:04:23", "throughput": 2499.11, "total_tokens": 6115104}
|
| 3002 |
+
{"current_steps": 14920, "total_steps": 16520, "loss": 0.1142, "lr": 1.416907972265788e-06, "epoch": 18.062953995157386, "percentage": 90.31, "elapsed_time": "0:40:47", "remaining_time": "0:04:22", "throughput": 2499.15, "total_tokens": 6117088}
|
| 3003 |
+
{"current_steps": 14925, "total_steps": 16520, "loss": 0.1497, "lr": 1.408155554091184e-06, "epoch": 18.06900726392252, "percentage": 90.35, "elapsed_time": "0:40:48", "remaining_time": "0:04:21", "throughput": 2499.2, "total_tokens": 6119200}
|
| 3004 |
+
{"current_steps": 14930, "total_steps": 16520, "loss": 0.0493, "lr": 1.3994294686840853e-06, "epoch": 18.07506053268765, "percentage": 90.38, "elapsed_time": "0:40:49", "remaining_time": "0:04:20", "throughput": 2499.22, "total_tokens": 6121280}
|
| 3005 |
+
{"current_steps": 14935, "total_steps": 16520, "loss": 0.1669, "lr": 1.3907297257843898e-06, "epoch": 18.081113801452783, "percentage": 90.41, "elapsed_time": "0:40:50", "remaining_time": "0:04:20", "throughput": 2499.24, "total_tokens": 6123456}
|
| 3006 |
+
{"current_steps": 14940, "total_steps": 16520, "loss": 0.0881, "lr": 1.3820563351025884e-06, "epoch": 18.087167070217916, "percentage": 90.44, "elapsed_time": "0:40:50", "remaining_time": "0:04:19", "throughput": 2499.24, "total_tokens": 6125568}
|
| 3007 |
+
{"current_steps": 14945, "total_steps": 16520, "loss": 0.1437, "lr": 1.3734093063197424e-06, "epoch": 18.093220338983052, "percentage": 90.47, "elapsed_time": "0:40:51", "remaining_time": "0:04:18", "throughput": 2499.26, "total_tokens": 6127488}
|
| 3008 |
+
{"current_steps": 14950, "total_steps": 16520, "loss": 0.1176, "lr": 1.3647886490875144e-06, "epoch": 18.099273607748184, "percentage": 90.5, "elapsed_time": "0:40:52", "remaining_time": "0:04:17", "throughput": 2499.31, "total_tokens": 6129472}
|
| 3009 |
+
{"current_steps": 14955, "total_steps": 16520, "loss": 0.0291, "lr": 1.3561943730281052e-06, "epoch": 18.105326876513317, "percentage": 90.53, "elapsed_time": "0:40:53", "remaining_time": "0:04:16", "throughput": 2499.3, "total_tokens": 6131488}
|
| 3010 |
+
{"current_steps": 14960, "total_steps": 16520, "loss": 0.1079, "lr": 1.3476264877342908e-06, "epoch": 18.11138014527845, "percentage": 90.56, "elapsed_time": "0:40:54", "remaining_time": "0:04:15", "throughput": 2499.28, "total_tokens": 6133408}
|
| 3011 |
+
{"current_steps": 14965, "total_steps": 16520, "loss": 0.0565, "lr": 1.3390850027693802e-06, "epoch": 18.11743341404358, "percentage": 90.59, "elapsed_time": "0:40:54", "remaining_time": "0:04:15", "throughput": 2499.33, "total_tokens": 6135424}
|
| 3012 |
+
{"current_steps": 14970, "total_steps": 16520, "loss": 0.0637, "lr": 1.3305699276672134e-06, "epoch": 18.123486682808718, "percentage": 90.62, "elapsed_time": "0:40:55", "remaining_time": "0:04:14", "throughput": 2499.37, "total_tokens": 6137472}
|
| 3013 |
+
{"current_steps": 14975, "total_steps": 16520, "loss": 0.1952, "lr": 1.3220812719321601e-06, "epoch": 18.12953995157385, "percentage": 90.65, "elapsed_time": "0:40:56", "remaining_time": "0:04:13", "throughput": 2499.42, "total_tokens": 6139552}
|
| 3014 |
+
{"current_steps": 14980, "total_steps": 16520, "loss": 0.1425, "lr": 1.3136190450390912e-06, "epoch": 18.135593220338983, "percentage": 90.68, "elapsed_time": "0:40:57", "remaining_time": "0:04:12", "throughput": 2499.45, "total_tokens": 6141536}
|
| 3015 |
+
{"current_steps": 14985, "total_steps": 16520, "loss": 0.0818, "lr": 1.3051832564333815e-06, "epoch": 18.141646489104115, "percentage": 90.71, "elapsed_time": "0:40:57", "remaining_time": "0:04:11", "throughput": 2499.5, "total_tokens": 6143552}
|
| 3016 |
+
{"current_steps": 14990, "total_steps": 16520, "loss": 0.1504, "lr": 1.2967739155309077e-06, "epoch": 18.147699757869248, "percentage": 90.74, "elapsed_time": "0:40:58", "remaining_time": "0:04:10", "throughput": 2499.55, "total_tokens": 6145632}
|
| 3017 |
+
{"current_steps": 14995, "total_steps": 16520, "loss": 0.0479, "lr": 1.2883910317180004e-06, "epoch": 18.153753026634384, "percentage": 90.77, "elapsed_time": "0:40:59", "remaining_time": "0:04:10", "throughput": 2499.55, "total_tokens": 6147680}
|
| 3018 |
+
{"current_steps": 15000, "total_steps": 16520, "loss": 0.1224, "lr": 1.2800346143514914e-06, "epoch": 18.159806295399516, "percentage": 90.8, "elapsed_time": "0:41:00", "remaining_time": "0:04:09", "throughput": 2499.61, "total_tokens": 6149792}
|
| 3019 |
+
{"current_steps": 15005, "total_steps": 16520, "loss": 0.0789, "lr": 1.2717046727586447e-06, "epoch": 18.16585956416465, "percentage": 90.83, "elapsed_time": "0:41:01", "remaining_time": "0:04:08", "throughput": 2499.64, "total_tokens": 6151744}
|
| 3020 |
+
{"current_steps": 15010, "total_steps": 16520, "loss": 0.1184, "lr": 1.2634012162371839e-06, "epoch": 18.17191283292978, "percentage": 90.86, "elapsed_time": "0:41:01", "remaining_time": "0:04:07", "throughput": 2499.67, "total_tokens": 6153856}
|
| 3021 |
+
{"current_steps": 15015, "total_steps": 16520, "loss": 0.0821, "lr": 1.2551242540552733e-06, "epoch": 18.177966101694917, "percentage": 90.89, "elapsed_time": "0:41:02", "remaining_time": "0:04:06", "throughput": 2499.68, "total_tokens": 6155840}
|
| 3022 |
+
{"current_steps": 15020, "total_steps": 16520, "loss": 0.113, "lr": 1.2468737954514948e-06, "epoch": 18.18401937046005, "percentage": 90.92, "elapsed_time": "0:41:03", "remaining_time": "0:04:06", "throughput": 2499.73, "total_tokens": 6157952}
|
| 3023 |
+
{"current_steps": 15025, "total_steps": 16520, "loss": 0.1893, "lr": 1.2386498496348541e-06, "epoch": 18.190072639225182, "percentage": 90.95, "elapsed_time": "0:41:04", "remaining_time": "0:04:05", "throughput": 2499.78, "total_tokens": 6159936}
|
| 3024 |
+
{"current_steps": 15030, "total_steps": 16520, "loss": 0.1303, "lr": 1.2304524257847672e-06, "epoch": 18.196125907990314, "percentage": 90.98, "elapsed_time": "0:41:04", "remaining_time": "0:04:04", "throughput": 2499.83, "total_tokens": 6162016}
|
| 3025 |
+
{"current_steps": 15035, "total_steps": 16520, "loss": 0.0892, "lr": 1.2222815330510367e-06, "epoch": 18.202179176755447, "percentage": 91.01, "elapsed_time": "0:41:05", "remaining_time": "0:04:03", "throughput": 2499.85, "total_tokens": 6164128}
|
| 3026 |
+
{"current_steps": 15040, "total_steps": 16520, "loss": 0.0638, "lr": 1.2141371805538593e-06, "epoch": 18.208232445520583, "percentage": 91.04, "elapsed_time": "0:41:06", "remaining_time": "0:04:02", "throughput": 2499.92, "total_tokens": 6166336}
|
| 3027 |
+
{"current_steps": 15045, "total_steps": 16520, "loss": 0.0868, "lr": 1.206019377383813e-06, "epoch": 18.214285714285715, "percentage": 91.07, "elapsed_time": "0:41:07", "remaining_time": "0:04:01", "throughput": 2499.96, "total_tokens": 6168384}
|
| 3028 |
+
{"current_steps": 15050, "total_steps": 16520, "loss": 0.1727, "lr": 1.197928132601825e-06, "epoch": 18.220338983050848, "percentage": 91.1, "elapsed_time": "0:41:08", "remaining_time": "0:04:01", "throughput": 2500.0, "total_tokens": 6170432}
|
| 3029 |
+
{"current_steps": 15055, "total_steps": 16520, "loss": 0.1405, "lr": 1.189863455239193e-06, "epoch": 18.22639225181598, "percentage": 91.13, "elapsed_time": "0:41:08", "remaining_time": "0:04:00", "throughput": 2500.04, "total_tokens": 6172416}
|
| 3030 |
+
{"current_steps": 15060, "total_steps": 16520, "loss": 0.0561, "lr": 1.1818253542975584e-06, "epoch": 18.232445520581113, "percentage": 91.16, "elapsed_time": "0:41:09", "remaining_time": "0:03:59", "throughput": 2500.1, "total_tokens": 6174336}
|
| 3031 |
+
{"current_steps": 15065, "total_steps": 16520, "loss": 0.0776, "lr": 1.173813838748894e-06, "epoch": 18.23849878934625, "percentage": 91.19, "elapsed_time": "0:41:10", "remaining_time": "0:03:58", "throughput": 2500.13, "total_tokens": 6176352}
|
| 3032 |
+
{"current_steps": 15070, "total_steps": 16520, "loss": 0.107, "lr": 1.1658289175354996e-06, "epoch": 18.24455205811138, "percentage": 91.22, "elapsed_time": "0:41:11", "remaining_time": "0:03:57", "throughput": 2500.18, "total_tokens": 6178272}
|
| 3033 |
+
{"current_steps": 15075, "total_steps": 16520, "loss": 0.1425, "lr": 1.1578705995699961e-06, "epoch": 18.250605326876514, "percentage": 91.25, "elapsed_time": "0:41:11", "remaining_time": "0:03:56", "throughput": 2500.19, "total_tokens": 6180320}
|
| 3034 |
+
{"current_steps": 15080, "total_steps": 16520, "loss": 0.1206, "lr": 1.1499388937352974e-06, "epoch": 18.256658595641646, "percentage": 91.28, "elapsed_time": "0:41:12", "remaining_time": "0:03:56", "throughput": 2500.23, "total_tokens": 6182368}
|
| 3035 |
+
{"current_steps": 15085, "total_steps": 16520, "loss": 0.1395, "lr": 1.1420338088846404e-06, "epoch": 18.26271186440678, "percentage": 91.31, "elapsed_time": "0:41:13", "remaining_time": "0:03:55", "throughput": 2500.23, "total_tokens": 6184480}
|
| 3036 |
+
{"current_steps": 15090, "total_steps": 16520, "loss": 0.123, "lr": 1.1341553538415095e-06, "epoch": 18.268765133171915, "percentage": 91.34, "elapsed_time": "0:41:14", "remaining_time": "0:03:54", "throughput": 2500.28, "total_tokens": 6186656}
|
| 3037 |
+
{"current_steps": 15095, "total_steps": 16520, "loss": 0.0486, "lr": 1.1263035373997033e-06, "epoch": 18.274818401937047, "percentage": 91.37, "elapsed_time": "0:41:15", "remaining_time": "0:03:53", "throughput": 2500.28, "total_tokens": 6188704}
|
| 3038 |
+
{"current_steps": 15100, "total_steps": 16520, "loss": 0.0732, "lr": 1.1184783683232585e-06, "epoch": 18.28087167070218, "percentage": 91.4, "elapsed_time": "0:41:15", "remaining_time": "0:03:52", "throughput": 2500.35, "total_tokens": 6190656}
|
| 3039 |
+
{"current_steps": 15105, "total_steps": 16520, "loss": 0.0979, "lr": 1.1106798553464804e-06, "epoch": 18.286924939467312, "percentage": 91.43, "elapsed_time": "0:41:16", "remaining_time": "0:03:52", "throughput": 2500.38, "total_tokens": 6192768}
|
| 3040 |
+
{"current_steps": 15110, "total_steps": 16520, "loss": 0.134, "lr": 1.1029080071739333e-06, "epoch": 18.292978208232444, "percentage": 91.46, "elapsed_time": "0:41:17", "remaining_time": "0:03:51", "throughput": 2500.4, "total_tokens": 6194784}
|
| 3041 |
+
{"current_steps": 15115, "total_steps": 16520, "loss": 0.0796, "lr": 1.095162832480387e-06, "epoch": 18.29903147699758, "percentage": 91.5, "elapsed_time": "0:41:18", "remaining_time": "0:03:50", "throughput": 2500.43, "total_tokens": 6196896}
|
| 3042 |
+
{"current_steps": 15120, "total_steps": 16520, "loss": 0.1114, "lr": 1.0874443399108702e-06, "epoch": 18.305084745762713, "percentage": 91.53, "elapsed_time": "0:41:19", "remaining_time": "0:03:49", "throughput": 2500.49, "total_tokens": 6198944}
|
| 3043 |
+
{"current_steps": 15125, "total_steps": 16520, "loss": 0.1043, "lr": 1.0797525380806168e-06, "epoch": 18.311138014527845, "percentage": 91.56, "elapsed_time": "0:41:19", "remaining_time": "0:03:48", "throughput": 2500.52, "total_tokens": 6201056}
|
| 3044 |
+
{"current_steps": 15130, "total_steps": 16520, "loss": 0.0658, "lr": 1.072087435575067e-06, "epoch": 18.317191283292978, "percentage": 91.59, "elapsed_time": "0:41:20", "remaining_time": "0:03:47", "throughput": 2500.58, "total_tokens": 6203168}
|
| 3045 |
+
{"current_steps": 15135, "total_steps": 16520, "loss": 0.1016, "lr": 1.0644490409498637e-06, "epoch": 18.32324455205811, "percentage": 91.62, "elapsed_time": "0:41:21", "remaining_time": "0:03:47", "throughput": 2500.6, "total_tokens": 6205248}
|
| 3046 |
+
{"current_steps": 15140, "total_steps": 16520, "loss": 0.0944, "lr": 1.0568373627308365e-06, "epoch": 18.329297820823246, "percentage": 91.65, "elapsed_time": "0:41:22", "remaining_time": "0:03:46", "throughput": 2500.6, "total_tokens": 6207296}
|
| 3047 |
+
{"current_steps": 15145, "total_steps": 16520, "loss": 0.1063, "lr": 1.0492524094139921e-06, "epoch": 18.33535108958838, "percentage": 91.68, "elapsed_time": "0:41:23", "remaining_time": "0:03:45", "throughput": 2500.65, "total_tokens": 6209472}
|
| 3048 |
+
{"current_steps": 15150, "total_steps": 16520, "loss": 0.1474, "lr": 1.0416941894655224e-06, "epoch": 18.34140435835351, "percentage": 91.71, "elapsed_time": "0:41:23", "remaining_time": "0:03:44", "throughput": 2500.71, "total_tokens": 6211488}
|
| 3049 |
+
{"current_steps": 15155, "total_steps": 16520, "loss": 0.1026, "lr": 1.0341627113217539e-06, "epoch": 18.347457627118644, "percentage": 91.74, "elapsed_time": "0:41:24", "remaining_time": "0:03:43", "throughput": 2500.75, "total_tokens": 6213536}
|
| 3050 |
+
{"current_steps": 15160, "total_steps": 16520, "loss": 0.1285, "lr": 1.026657983389187e-06, "epoch": 18.353510895883776, "percentage": 91.77, "elapsed_time": "0:41:25", "remaining_time": "0:03:42", "throughput": 2500.79, "total_tokens": 6215744}
|
| 3051 |
+
{"current_steps": 15165, "total_steps": 16520, "loss": 0.0783, "lr": 1.0191800140444574e-06, "epoch": 18.359564164648912, "percentage": 91.8, "elapsed_time": "0:41:26", "remaining_time": "0:03:42", "throughput": 2500.81, "total_tokens": 6217824}
|
| 3052 |
+
{"current_steps": 15170, "total_steps": 16520, "loss": 0.1054, "lr": 1.0117288116343298e-06, "epoch": 18.365617433414045, "percentage": 91.83, "elapsed_time": "0:41:27", "remaining_time": "0:03:41", "throughput": 2500.81, "total_tokens": 6219872}
|
| 3053 |
+
{"current_steps": 15175, "total_steps": 16520, "loss": 0.055, "lr": 1.0043043844756934e-06, "epoch": 18.371670702179177, "percentage": 91.86, "elapsed_time": "0:41:27", "remaining_time": "0:03:40", "throughput": 2500.84, "total_tokens": 6221888}
|
| 3054 |
+
{"current_steps": 15180, "total_steps": 16520, "loss": 0.2029, "lr": 9.96906740855555e-07, "epoch": 18.37772397094431, "percentage": 91.89, "elapsed_time": "0:41:28", "remaining_time": "0:03:39", "throughput": 2500.91, "total_tokens": 6223936}
|
| 3055 |
+
{"current_steps": 15185, "total_steps": 16520, "loss": 0.0654, "lr": 9.895358890310208e-07, "epoch": 18.383777239709442, "percentage": 91.92, "elapsed_time": "0:41:29", "remaining_time": "0:03:38", "throughput": 2500.95, "total_tokens": 6225920}
|
| 3056 |
+
{"current_steps": 15190, "total_steps": 16520, "loss": 0.1272, "lr": 9.821918372292959e-07, "epoch": 18.389830508474578, "percentage": 91.95, "elapsed_time": "0:41:30", "remaining_time": "0:03:38", "throughput": 2500.99, "total_tokens": 6228064}
|
| 3057 |
+
{"current_steps": 15195, "total_steps": 16520, "loss": 0.0596, "lr": 9.748745936476734e-07, "epoch": 18.39588377723971, "percentage": 91.98, "elapsed_time": "0:41:30", "remaining_time": "0:03:37", "throughput": 2501.02, "total_tokens": 6230016}
|
| 3058 |
+
{"current_steps": 15200, "total_steps": 16520, "loss": 0.1211, "lr": 9.675841664535167e-07, "epoch": 18.401937046004843, "percentage": 92.01, "elapsed_time": "0:41:31", "remaining_time": "0:03:36", "throughput": 2501.08, "total_tokens": 6232064}
|
| 3059 |
+
{"current_steps": 15205, "total_steps": 16520, "loss": 0.1236, "lr": 9.6032056378427e-07, "epoch": 18.407990314769975, "percentage": 92.04, "elapsed_time": "0:41:32", "remaining_time": "0:03:35", "throughput": 2501.12, "total_tokens": 6234112}
|
| 3060 |
+
{"current_steps": 15210, "total_steps": 16520, "loss": 0.0951, "lr": 9.53083793747414e-07, "epoch": 18.414043583535108, "percentage": 92.07, "elapsed_time": "0:41:33", "remaining_time": "0:03:34", "throughput": 2501.16, "total_tokens": 6236192}
|
| 3061 |
+
{"current_steps": 15215, "total_steps": 16520, "loss": 0.0411, "lr": 9.458738644205129e-07, "epoch": 18.420096852300244, "percentage": 92.1, "elapsed_time": "0:41:34", "remaining_time": "0:03:33", "throughput": 2501.24, "total_tokens": 6238368}
|
| 3062 |
+
{"current_steps": 15220, "total_steps": 16520, "loss": 0.1364, "lr": 9.386907838511344e-07, "epoch": 18.426150121065376, "percentage": 92.13, "elapsed_time": "0:41:34", "remaining_time": "0:03:33", "throughput": 2501.29, "total_tokens": 6240384}
|
| 3063 |
+
{"current_steps": 15225, "total_steps": 16520, "loss": 0.0797, "lr": 9.31534560056907e-07, "epoch": 18.43220338983051, "percentage": 92.16, "elapsed_time": "0:41:35", "remaining_time": "0:03:32", "throughput": 2501.32, "total_tokens": 6242496}
|
| 3064 |
+
{"current_steps": 15230, "total_steps": 16520, "loss": 0.1392, "lr": 9.244052010254662e-07, "epoch": 18.43825665859564, "percentage": 92.19, "elapsed_time": "0:41:36", "remaining_time": "0:03:31", "throughput": 2501.38, "total_tokens": 6244512}
|
| 3065 |
+
{"current_steps": 15235, "total_steps": 16520, "loss": 0.1566, "lr": 9.173027147144714e-07, "epoch": 18.444309927360774, "percentage": 92.22, "elapsed_time": "0:41:37", "remaining_time": "0:03:30", "throughput": 2501.43, "total_tokens": 6246688}
|
| 3066 |
+
{"current_steps": 15240, "total_steps": 16520, "loss": 0.0876, "lr": 9.102271090515784e-07, "epoch": 18.45036319612591, "percentage": 92.25, "elapsed_time": "0:41:38", "remaining_time": "0:03:29", "throughput": 2501.47, "total_tokens": 6248736}
|
| 3067 |
+
{"current_steps": 15245, "total_steps": 16520, "loss": 0.0659, "lr": 9.031783919344478e-07, "epoch": 18.456416464891042, "percentage": 92.28, "elapsed_time": "0:41:38", "remaining_time": "0:03:28", "throughput": 2501.53, "total_tokens": 6250944}
|
| 3068 |
+
{"current_steps": 15250, "total_steps": 16520, "loss": 0.0804, "lr": 8.961565712307163e-07, "epoch": 18.462469733656174, "percentage": 92.31, "elapsed_time": "0:41:39", "remaining_time": "0:03:28", "throughput": 2501.55, "total_tokens": 6253120}
|
| 3069 |
+
{"current_steps": 15255, "total_steps": 16520, "loss": 0.0788, "lr": 8.891616547780174e-07, "epoch": 18.468523002421307, "percentage": 92.34, "elapsed_time": "0:41:40", "remaining_time": "0:03:27", "throughput": 2501.6, "total_tokens": 6255264}
|
| 3070 |
+
{"current_steps": 15260, "total_steps": 16520, "loss": 0.1169, "lr": 8.821936503839334e-07, "epoch": 18.47457627118644, "percentage": 92.37, "elapsed_time": "0:41:41", "remaining_time": "0:03:26", "throughput": 2501.62, "total_tokens": 6257344}
|
| 3071 |
+
{"current_steps": 15265, "total_steps": 16520, "loss": 0.0922, "lr": 8.75252565826018e-07, "epoch": 18.480629539951575, "percentage": 92.4, "elapsed_time": "0:41:42", "remaining_time": "0:03:25", "throughput": 2501.66, "total_tokens": 6259328}
|
| 3072 |
+
{"current_steps": 15270, "total_steps": 16520, "loss": 0.0588, "lr": 8.683384088517904e-07, "epoch": 18.486682808716708, "percentage": 92.43, "elapsed_time": "0:41:42", "remaining_time": "0:03:24", "throughput": 2501.65, "total_tokens": 6261248}
|
| 3073 |
+
{"current_steps": 15275, "total_steps": 16520, "loss": 0.0759, "lr": 8.614511871786829e-07, "epoch": 18.49273607748184, "percentage": 92.46, "elapsed_time": "0:41:43", "remaining_time": "0:03:24", "throughput": 2501.7, "total_tokens": 6263168}
|
| 3074 |
+
{"current_steps": 15280, "total_steps": 16520, "loss": 0.157, "lr": 8.545909084940962e-07, "epoch": 18.498789346246973, "percentage": 92.49, "elapsed_time": "0:41:44", "remaining_time": "0:03:23", "throughput": 2501.73, "total_tokens": 6265216}
|
| 3075 |
+
{"current_steps": 15285, "total_steps": 16520, "loss": 0.0867, "lr": 8.477575804553356e-07, "epoch": 18.504842615012105, "percentage": 92.52, "elapsed_time": "0:41:45", "remaining_time": "0:03:22", "throughput": 2501.77, "total_tokens": 6267264}
|
| 3076 |
+
{"current_steps": 15290, "total_steps": 16520, "loss": 0.1281, "lr": 8.409512106896334e-07, "epoch": 18.51089588377724, "percentage": 92.55, "elapsed_time": "0:41:45", "remaining_time": "0:03:21", "throughput": 2501.81, "total_tokens": 6269344}
|
| 3077 |
+
{"current_steps": 15295, "total_steps": 16520, "loss": 0.0674, "lr": 8.34171806794129e-07, "epoch": 18.516949152542374, "percentage": 92.58, "elapsed_time": "0:41:46", "remaining_time": "0:03:20", "throughput": 2501.86, "total_tokens": 6271328}
|
| 3078 |
+
{"current_steps": 15300, "total_steps": 16520, "loss": 0.0681, "lr": 8.27419376335864e-07, "epoch": 18.523002421307506, "percentage": 92.62, "elapsed_time": "0:41:47", "remaining_time": "0:03:19", "throughput": 2501.92, "total_tokens": 6273248}
|
| 3079 |
+
{"current_steps": 15305, "total_steps": 16520, "loss": 0.111, "lr": 8.206939268517705e-07, "epoch": 18.52905569007264, "percentage": 92.65, "elapsed_time": "0:41:48", "remaining_time": "0:03:19", "throughput": 2501.94, "total_tokens": 6275328}
|
| 3080 |
+
{"current_steps": 15310, "total_steps": 16520, "loss": 0.1406, "lr": 8.139954658486771e-07, "epoch": 18.53510895883777, "percentage": 92.68, "elapsed_time": "0:41:48", "remaining_time": "0:03:18", "throughput": 2501.98, "total_tokens": 6277408}
|
| 3081 |
+
{"current_steps": 15315, "total_steps": 16520, "loss": 0.1418, "lr": 8.07324000803264e-07, "epoch": 18.541162227602907, "percentage": 92.71, "elapsed_time": "0:41:49", "remaining_time": "0:03:17", "throughput": 2502.01, "total_tokens": 6279328}
|
| 3082 |
+
{"current_steps": 15320, "total_steps": 16520, "loss": 0.0458, "lr": 8.006795391621053e-07, "epoch": 18.54721549636804, "percentage": 92.74, "elapsed_time": "0:41:50", "remaining_time": "0:03:16", "throughput": 2502.05, "total_tokens": 6281376}
|
| 3083 |
+
{"current_steps": 15325, "total_steps": 16520, "loss": 0.1008, "lr": 7.940620883416155e-07, "epoch": 18.553268765133172, "percentage": 92.77, "elapsed_time": "0:41:51", "remaining_time": "0:03:15", "throughput": 2502.07, "total_tokens": 6283392}
|
| 3084 |
+
{"current_steps": 15330, "total_steps": 16520, "loss": 0.1188, "lr": 7.874716557280698e-07, "epoch": 18.559322033898304, "percentage": 92.8, "elapsed_time": "0:41:52", "remaining_time": "0:03:15", "throughput": 2502.14, "total_tokens": 6285632}
|
| 3085 |
+
{"current_steps": 15335, "total_steps": 16520, "loss": 0.1077, "lr": 7.809082486775838e-07, "epoch": 18.565375302663437, "percentage": 92.83, "elapsed_time": "0:41:52", "remaining_time": "0:03:14", "throughput": 2502.17, "total_tokens": 6287680}
|
| 3086 |
+
{"current_steps": 15340, "total_steps": 16520, "loss": 0.1117, "lr": 7.743718745161083e-07, "epoch": 18.571428571428573, "percentage": 92.86, "elapsed_time": "0:41:53", "remaining_time": "0:03:13", "throughput": 2502.25, "total_tokens": 6289760}
|
| 3087 |
+
{"current_steps": 15345, "total_steps": 16520, "loss": 0.0793, "lr": 7.678625405394157e-07, "epoch": 18.577481840193705, "percentage": 92.89, "elapsed_time": "0:41:54", "remaining_time": "0:03:12", "throughput": 2502.29, "total_tokens": 6291808}
|
| 3088 |
+
{"current_steps": 15350, "total_steps": 16520, "loss": 0.1208, "lr": 7.613802540131054e-07, "epoch": 18.583535108958838, "percentage": 92.92, "elapsed_time": "0:41:55", "remaining_time": "0:03:11", "throughput": 2502.32, "total_tokens": 6293920}
|
| 3089 |
+
{"current_steps": 15355, "total_steps": 16520, "loss": 0.1002, "lr": 7.549250221725784e-07, "epoch": 18.58958837772397, "percentage": 92.95, "elapsed_time": "0:41:56", "remaining_time": "0:03:10", "throughput": 2502.33, "total_tokens": 6295904}
|
| 3090 |
+
{"current_steps": 15360, "total_steps": 16520, "loss": 0.0285, "lr": 7.484968522230434e-07, "epoch": 18.595641646489103, "percentage": 92.98, "elapsed_time": "0:41:56", "remaining_time": "0:03:10", "throughput": 2502.36, "total_tokens": 6297920}
|
| 3091 |
+
{"current_steps": 15365, "total_steps": 16520, "loss": 0.0718, "lr": 7.420957513395027e-07, "epoch": 18.60169491525424, "percentage": 93.01, "elapsed_time": "0:41:57", "remaining_time": "0:03:09", "throughput": 2502.39, "total_tokens": 6300064}
|
| 3092 |
+
{"current_steps": 15370, "total_steps": 16520, "loss": 0.0574, "lr": 7.357217266667355e-07, "epoch": 18.60774818401937, "percentage": 93.04, "elapsed_time": "0:41:58", "remaining_time": "0:03:08", "throughput": 2502.43, "total_tokens": 6302016}
|
| 3093 |
+
{"current_steps": 15375, "total_steps": 16520, "loss": 0.0671, "lr": 7.293747853193201e-07, "epoch": 18.613801452784504, "percentage": 93.07, "elapsed_time": "0:41:59", "remaining_time": "0:03:07", "throughput": 2502.49, "total_tokens": 6304064}
|
| 3094 |
+
{"current_steps": 15380, "total_steps": 16520, "loss": 0.1402, "lr": 7.230549343815813e-07, "epoch": 18.619854721549636, "percentage": 93.1, "elapsed_time": "0:41:59", "remaining_time": "0:03:06", "throughput": 2502.5, "total_tokens": 6305888}
|
| 3095 |
+
{"current_steps": 15385, "total_steps": 16520, "loss": 0.0822, "lr": 7.16762180907618e-07, "epoch": 18.62590799031477, "percentage": 93.13, "elapsed_time": "0:42:00", "remaining_time": "0:03:05", "throughput": 2502.51, "total_tokens": 6307872}
|
| 3096 |
+
{"current_steps": 15390, "total_steps": 16520, "loss": 0.0771, "lr": 7.10496531921287e-07, "epoch": 18.631961259079905, "percentage": 93.16, "elapsed_time": "0:42:01", "remaining_time": "0:03:05", "throughput": 2502.56, "total_tokens": 6310048}
|
| 3097 |
+
{"current_steps": 15395, "total_steps": 16520, "loss": 0.1386, "lr": 7.042579944161797e-07, "epoch": 18.638014527845037, "percentage": 93.19, "elapsed_time": "0:42:02", "remaining_time": "0:03:04", "throughput": 2502.57, "total_tokens": 6312128}
|
| 3098 |
+
{"current_steps": 15400, "total_steps": 16520, "loss": 0.0731, "lr": 6.980465753556376e-07, "epoch": 18.64406779661017, "percentage": 93.22, "elapsed_time": "0:42:03", "remaining_time": "0:03:03", "throughput": 2502.63, "total_tokens": 6314144}
|
| 3099 |
+
{"current_steps": 15405, "total_steps": 16520, "loss": 0.1487, "lr": 6.918622816727255e-07, "epoch": 18.650121065375302, "percentage": 93.25, "elapsed_time": "0:42:03", "remaining_time": "0:03:02", "throughput": 2502.69, "total_tokens": 6316192}
|
| 3100 |
+
{"current_steps": 15410, "total_steps": 16520, "loss": 0.0903, "lr": 6.85705120270233e-07, "epoch": 18.656174334140434, "percentage": 93.28, "elapsed_time": "0:42:04", "remaining_time": "0:03:01", "throughput": 2502.71, "total_tokens": 6318112}
|
| 3101 |
+
{"current_steps": 15415, "total_steps": 16520, "loss": 0.1183, "lr": 6.795750980206711e-07, "epoch": 18.66222760290557, "percentage": 93.31, "elapsed_time": "0:42:05", "remaining_time": "0:03:01", "throughput": 2502.72, "total_tokens": 6320256}
|
| 3102 |
+
{"current_steps": 15420, "total_steps": 16520, "loss": 0.1214, "lr": 6.734722217662526e-07, "epoch": 18.668280871670703, "percentage": 93.34, "elapsed_time": "0:42:06", "remaining_time": "0:03:00", "throughput": 2502.74, "total_tokens": 6322336}
|
| 3103 |
+
{"current_steps": 15425, "total_steps": 16520, "loss": 0.1038, "lr": 6.673964983188868e-07, "epoch": 18.674334140435835, "percentage": 93.37, "elapsed_time": "0:42:06", "remaining_time": "0:02:59", "throughput": 2502.8, "total_tokens": 6324288}
|
| 3104 |
+
{"current_steps": 15430, "total_steps": 16520, "loss": 0.0616, "lr": 6.613479344601881e-07, "epoch": 18.680387409200968, "percentage": 93.4, "elapsed_time": "0:42:07", "remaining_time": "0:02:58", "throughput": 2502.86, "total_tokens": 6326336}
|
| 3105 |
+
{"current_steps": 15435, "total_steps": 16520, "loss": 0.077, "lr": 6.553265369414419e-07, "epoch": 18.6864406779661, "percentage": 93.43, "elapsed_time": "0:42:08", "remaining_time": "0:02:57", "throughput": 2502.93, "total_tokens": 6328608}
|
| 3106 |
+
{"current_steps": 15440, "total_steps": 16520, "loss": 0.1096, "lr": 6.493323124836193e-07, "epoch": 18.692493946731236, "percentage": 93.46, "elapsed_time": "0:42:09", "remaining_time": "0:02:56", "throughput": 2502.94, "total_tokens": 6330592}
|
| 3107 |
+
{"current_steps": 15445, "total_steps": 16520, "loss": 0.0953, "lr": 6.433652677773627e-07, "epoch": 18.69854721549637, "percentage": 93.49, "elapsed_time": "0:42:09", "remaining_time": "0:02:56", "throughput": 2502.96, "total_tokens": 6332352}
|
| 3108 |
+
{"current_steps": 15450, "total_steps": 16520, "loss": 0.1658, "lr": 6.374254094829723e-07, "epoch": 18.7046004842615, "percentage": 93.52, "elapsed_time": "0:42:10", "remaining_time": "0:02:55", "throughput": 2502.96, "total_tokens": 6334464}
|
| 3109 |
+
{"current_steps": 15455, "total_steps": 16520, "loss": 0.0888, "lr": 6.315127442304003e-07, "epoch": 18.710653753026634, "percentage": 93.55, "elapsed_time": "0:42:11", "remaining_time": "0:02:54", "throughput": 2502.99, "total_tokens": 6336576}
|
| 3110 |
+
{"current_steps": 15460, "total_steps": 16520, "loss": 0.1595, "lr": 6.256272786192563e-07, "epoch": 18.716707021791766, "percentage": 93.58, "elapsed_time": "0:42:12", "remaining_time": "0:02:53", "throughput": 2503.02, "total_tokens": 6338720}
|
| 3111 |
+
{"current_steps": 15465, "total_steps": 16520, "loss": 0.0484, "lr": 6.197690192187827e-07, "epoch": 18.722760290556902, "percentage": 93.61, "elapsed_time": "0:42:13", "remaining_time": "0:02:52", "throughput": 2503.06, "total_tokens": 6340608}
|
| 3112 |
+
{"current_steps": 15470, "total_steps": 16520, "loss": 0.1598, "lr": 6.139379725678602e-07, "epoch": 18.728813559322035, "percentage": 93.64, "elapsed_time": "0:42:13", "remaining_time": "0:02:51", "throughput": 2503.08, "total_tokens": 6342624}
|
| 3113 |
+
{"current_steps": 15475, "total_steps": 16520, "loss": 0.078, "lr": 6.08134145174985e-07, "epoch": 18.734866828087167, "percentage": 93.67, "elapsed_time": "0:42:14", "remaining_time": "0:02:51", "throughput": 2503.15, "total_tokens": 6344672}
|
| 3114 |
+
{"current_steps": 15480, "total_steps": 16520, "loss": 0.1623, "lr": 6.023575435182865e-07, "epoch": 18.7409200968523, "percentage": 93.7, "elapsed_time": "0:42:15", "remaining_time": "0:02:50", "throughput": 2503.19, "total_tokens": 6346816}
|
| 3115 |
+
{"current_steps": 15485, "total_steps": 16520, "loss": 0.0856, "lr": 5.966081740454932e-07, "epoch": 18.746973365617432, "percentage": 93.73, "elapsed_time": "0:42:16", "remaining_time": "0:02:49", "throughput": 2503.18, "total_tokens": 6348768}
|
| 3116 |
+
{"current_steps": 15490, "total_steps": 16520, "loss": 0.176, "lr": 5.90886043173941e-07, "epoch": 18.753026634382568, "percentage": 93.77, "elapsed_time": "0:42:17", "remaining_time": "0:02:48", "throughput": 2503.2, "total_tokens": 6350848}
|
| 3117 |
+
{"current_steps": 15495, "total_steps": 16520, "loss": 0.1083, "lr": 5.851911572905711e-07, "epoch": 18.7590799031477, "percentage": 93.8, "elapsed_time": "0:42:17", "remaining_time": "0:02:47", "throughput": 2503.2, "total_tokens": 6352960}
|
| 3118 |
+
{"current_steps": 15500, "total_steps": 16520, "loss": 0.1052, "lr": 5.79523522751893e-07, "epoch": 18.765133171912833, "percentage": 93.83, "elapsed_time": "0:42:18", "remaining_time": "0:02:47", "throughput": 2503.24, "total_tokens": 6355200}
|
| 3119 |
+
{"current_steps": 15505, "total_steps": 16520, "loss": 0.1273, "lr": 5.738831458840243e-07, "epoch": 18.771186440677965, "percentage": 93.86, "elapsed_time": "0:42:19", "remaining_time": "0:02:46", "throughput": 2503.26, "total_tokens": 6357280}
|
| 3120 |
+
{"current_steps": 15510, "total_steps": 16520, "loss": 0.1265, "lr": 5.682700329826401e-07, "epoch": 18.777239709443098, "percentage": 93.89, "elapsed_time": "0:42:20", "remaining_time": "0:02:45", "throughput": 2503.3, "total_tokens": 6359328}
|
| 3121 |
+
{"current_steps": 15515, "total_steps": 16520, "loss": 0.0685, "lr": 5.626841903129954e-07, "epoch": 18.783292978208234, "percentage": 93.92, "elapsed_time": "0:42:21", "remaining_time": "0:02:44", "throughput": 2503.35, "total_tokens": 6361312}
|
| 3122 |
+
{"current_steps": 15520, "total_steps": 16520, "loss": 0.1262, "lr": 5.571256241098943e-07, "epoch": 18.789346246973366, "percentage": 93.95, "elapsed_time": "0:42:21", "remaining_time": "0:02:43", "throughput": 2503.35, "total_tokens": 6363360}
|
| 3123 |
+
{"current_steps": 15525, "total_steps": 16520, "loss": 0.074, "lr": 5.515943405777102e-07, "epoch": 18.7953995157385, "percentage": 93.98, "elapsed_time": "0:42:22", "remaining_time": "0:02:42", "throughput": 2503.43, "total_tokens": 6365344}
|
| 3124 |
+
{"current_steps": 15530, "total_steps": 16520, "loss": 0.1522, "lr": 5.460903458903488e-07, "epoch": 18.80145278450363, "percentage": 94.01, "elapsed_time": "0:42:23", "remaining_time": "0:02:42", "throughput": 2503.47, "total_tokens": 6367328}
|
| 3125 |
+
{"current_steps": 15535, "total_steps": 16520, "loss": 0.0787, "lr": 5.406136461912709e-07, "epoch": 18.807506053268764, "percentage": 94.04, "elapsed_time": "0:42:24", "remaining_time": "0:02:41", "throughput": 2503.49, "total_tokens": 6369408}
|
| 3126 |
+
{"current_steps": 15540, "total_steps": 16520, "loss": 0.1408, "lr": 5.351642475934587e-07, "epoch": 18.8135593220339, "percentage": 94.07, "elapsed_time": "0:42:25", "remaining_time": "0:02:40", "throughput": 2503.54, "total_tokens": 6371584}
|
| 3127 |
+
{"current_steps": 15545, "total_steps": 16520, "loss": 0.0928, "lr": 5.29742156179433e-07, "epoch": 18.819612590799032, "percentage": 94.1, "elapsed_time": "0:42:25", "remaining_time": "0:02:39", "throughput": 2503.58, "total_tokens": 6373664}
|
| 3128 |
+
{"current_steps": 15550, "total_steps": 16520, "loss": 0.0964, "lr": 5.243473780012248e-07, "epoch": 18.825665859564165, "percentage": 94.13, "elapsed_time": "0:42:26", "remaining_time": "0:02:38", "throughput": 2503.63, "total_tokens": 6375648}
|
| 3129 |
+
{"current_steps": 15555, "total_steps": 16520, "loss": 0.0714, "lr": 5.18979919080384e-07, "epoch": 18.831719128329297, "percentage": 94.16, "elapsed_time": "0:42:27", "remaining_time": "0:02:38", "throughput": 2503.66, "total_tokens": 6377792}
|
| 3130 |
+
{"current_steps": 15560, "total_steps": 16520, "loss": 0.1585, "lr": 5.136397854079655e-07, "epoch": 18.83777239709443, "percentage": 94.19, "elapsed_time": "0:42:28", "remaining_time": "0:02:37", "throughput": 2503.71, "total_tokens": 6379968}
|
| 3131 |
+
{"current_steps": 15565, "total_steps": 16520, "loss": 0.1043, "lr": 5.083269829445236e-07, "epoch": 18.843825665859566, "percentage": 94.22, "elapsed_time": "0:42:28", "remaining_time": "0:02:36", "throughput": 2503.77, "total_tokens": 6382080}
|
| 3132 |
+
{"current_steps": 15570, "total_steps": 16520, "loss": 0.1085, "lr": 5.030415176201093e-07, "epoch": 18.849878934624698, "percentage": 94.25, "elapsed_time": "0:42:29", "remaining_time": "0:02:35", "throughput": 2503.8, "total_tokens": 6384288}
|
| 3133 |
+
{"current_steps": 15575, "total_steps": 16520, "loss": 0.1129, "lr": 4.977833953342615e-07, "epoch": 18.85593220338983, "percentage": 94.28, "elapsed_time": "0:42:30", "remaining_time": "0:02:34", "throughput": 2503.82, "total_tokens": 6386304}
|
| 3134 |
+
{"current_steps": 15580, "total_steps": 16520, "loss": 0.139, "lr": 4.925526219559912e-07, "epoch": 18.861985472154963, "percentage": 94.31, "elapsed_time": "0:42:31", "remaining_time": "0:02:33", "throughput": 2503.89, "total_tokens": 6388544}
|
| 3135 |
+
{"current_steps": 15585, "total_steps": 16520, "loss": 0.0162, "lr": 4.873492033237864e-07, "epoch": 18.868038740920095, "percentage": 94.34, "elapsed_time": "0:42:32", "remaining_time": "0:02:33", "throughput": 2503.9, "total_tokens": 6390528}
|
| 3136 |
+
{"current_steps": 15590, "total_steps": 16520, "loss": 0.0686, "lr": 4.821731452456125e-07, "epoch": 18.87409200968523, "percentage": 94.37, "elapsed_time": "0:42:33", "remaining_time": "0:02:32", "throughput": 2503.95, "total_tokens": 6392608}
|
| 3137 |
+
{"current_steps": 15595, "total_steps": 16520, "loss": 0.0786, "lr": 4.770244534988754e-07, "epoch": 18.880145278450364, "percentage": 94.4, "elapsed_time": "0:42:33", "remaining_time": "0:02:31", "throughput": 2504.0, "total_tokens": 6394784}
|
| 3138 |
+
{"current_steps": 15600, "total_steps": 16520, "loss": 0.1351, "lr": 4.7190313383045637e-07, "epoch": 18.886198547215496, "percentage": 94.43, "elapsed_time": "0:42:34", "remaining_time": "0:02:30", "throughput": 2504.05, "total_tokens": 6396800}
|
| 3139 |
+
{"current_steps": 15605, "total_steps": 16520, "loss": 0.1016, "lr": 4.6680919195667137e-07, "epoch": 18.89225181598063, "percentage": 94.46, "elapsed_time": "0:42:35", "remaining_time": "0:02:29", "throughput": 2504.09, "total_tokens": 6398688}
|
| 3140 |
+
{"current_steps": 15610, "total_steps": 16520, "loss": 0.0627, "lr": 4.6174263356328075e-07, "epoch": 18.89830508474576, "percentage": 94.49, "elapsed_time": "0:42:36", "remaining_time": "0:02:29", "throughput": 2504.13, "total_tokens": 6400736}
|
| 3141 |
+
{"current_steps": 15615, "total_steps": 16520, "loss": 0.1248, "lr": 4.567034643054802e-07, "epoch": 18.904358353510897, "percentage": 94.52, "elapsed_time": "0:42:36", "remaining_time": "0:02:28", "throughput": 2504.18, "total_tokens": 6402720}
|
| 3142 |
+
{"current_steps": 15620, "total_steps": 16520, "loss": 0.1576, "lr": 4.5169168980789545e-07, "epoch": 18.91041162227603, "percentage": 94.55, "elapsed_time": "0:42:37", "remaining_time": "0:02:27", "throughput": 2504.21, "total_tokens": 6404832}
|
| 3143 |
+
{"current_steps": 15625, "total_steps": 16520, "loss": 0.0326, "lr": 4.4670731566457126e-07, "epoch": 18.916464891041162, "percentage": 94.58, "elapsed_time": "0:42:38", "remaining_time": "0:02:26", "throughput": 2504.22, "total_tokens": 6406816}
|
| 3144 |
+
{"current_steps": 15630, "total_steps": 16520, "loss": 0.0862, "lr": 4.4175034743897947e-07, "epoch": 18.922518159806295, "percentage": 94.61, "elapsed_time": "0:42:39", "remaining_time": "0:02:25", "throughput": 2504.21, "total_tokens": 6408832}
|
| 3145 |
+
{"current_steps": 15635, "total_steps": 16520, "loss": 0.0548, "lr": 4.368207906639804e-07, "epoch": 18.928571428571427, "percentage": 94.64, "elapsed_time": "0:42:39", "remaining_time": "0:02:24", "throughput": 2504.24, "total_tokens": 6410848}
|
| 3146 |
+
{"current_steps": 15640, "total_steps": 16520, "loss": 0.1499, "lr": 4.319186508418671e-07, "epoch": 18.934624697336563, "percentage": 94.67, "elapsed_time": "0:42:40", "remaining_time": "0:02:24", "throughput": 2504.29, "total_tokens": 6413024}
|
| 3147 |
+
{"current_steps": 15645, "total_steps": 16520, "loss": 0.0994, "lr": 4.270439334442988e-07, "epoch": 18.940677966101696, "percentage": 94.7, "elapsed_time": "0:42:41", "remaining_time": "0:02:23", "throughput": 2504.34, "total_tokens": 6415040}
|
| 3148 |
+
{"current_steps": 15650, "total_steps": 16520, "loss": 0.0711, "lr": 4.221966439123509e-07, "epoch": 18.946731234866828, "percentage": 94.73, "elapsed_time": "0:42:42", "remaining_time": "0:02:22", "throughput": 2504.36, "total_tokens": 6417216}
|
| 3149 |
+
{"current_steps": 15655, "total_steps": 16520, "loss": 0.0636, "lr": 4.173767876564788e-07, "epoch": 18.95278450363196, "percentage": 94.76, "elapsed_time": "0:42:43", "remaining_time": "0:02:21", "throughput": 2504.44, "total_tokens": 6419200}
|
| 3150 |
+
{"current_steps": 15660, "total_steps": 16520, "loss": 0.1007, "lr": 4.1258437005650687e-07, "epoch": 18.958837772397093, "percentage": 94.79, "elapsed_time": "0:42:43", "remaining_time": "0:02:20", "throughput": 2504.44, "total_tokens": 6421152}
|
| 3151 |
+
{"current_steps": 15665, "total_steps": 16520, "loss": 0.1656, "lr": 4.0781939646164226e-07, "epoch": 18.96489104116223, "percentage": 94.82, "elapsed_time": "0:42:44", "remaining_time": "0:02:19", "throughput": 2504.45, "total_tokens": 6423200}
|
| 3152 |
+
{"current_steps": 15670, "total_steps": 16520, "loss": 0.1192, "lr": 4.030818721904611e-07, "epoch": 18.97094430992736, "percentage": 94.85, "elapsed_time": "0:42:45", "remaining_time": "0:02:19", "throughput": 2504.47, "total_tokens": 6425216}
|
| 3153 |
+
{"current_steps": 15675, "total_steps": 16520, "loss": 0.0817, "lr": 3.983718025308947e-07, "epoch": 18.976997578692494, "percentage": 94.88, "elapsed_time": "0:42:46", "remaining_time": "0:02:18", "throughput": 2504.49, "total_tokens": 6427200}
|
| 3154 |
+
{"current_steps": 15680, "total_steps": 16520, "loss": 0.1085, "lr": 3.9368919274023475e-07, "epoch": 18.983050847457626, "percentage": 94.92, "elapsed_time": "0:42:47", "remaining_time": "0:02:17", "throughput": 2504.51, "total_tokens": 6429216}
|
| 3155 |
+
{"current_steps": 15685, "total_steps": 16520, "loss": 0.09, "lr": 3.890340480451199e-07, "epoch": 18.98910411622276, "percentage": 94.95, "elapsed_time": "0:42:47", "remaining_time": "0:02:16", "throughput": 2504.58, "total_tokens": 6431168}
|
| 3156 |
+
{"current_steps": 15690, "total_steps": 16520, "loss": 0.1607, "lr": 3.8440637364153265e-07, "epoch": 18.995157384987895, "percentage": 94.98, "elapsed_time": "0:42:48", "remaining_time": "0:02:15", "throughput": 2504.59, "total_tokens": 6433152}
|
| 3157 |
+
{"current_steps": 15694, "total_steps": 16520, "eval_loss": 0.14278624951839447, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:42:53", "remaining_time": "0:02:15", "throughput": 2499.89, "total_tokens": 6434448}
|
| 3158 |
+
{"current_steps": 15695, "total_steps": 16520, "loss": 0.0971, "lr": 3.7980617469479953e-07, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:42:54", "remaining_time": "0:02:15", "throughput": 2499.06, "total_tokens": 6434832}
|
| 3159 |
+
{"current_steps": 15700, "total_steps": 16520, "loss": 0.1391, "lr": 3.7523345633957153e-07, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:42:55", "remaining_time": "0:02:14", "throughput": 2498.98, "total_tokens": 6436976}
|
| 3160 |
+
{"current_steps": 15705, "total_steps": 16520, "loss": 0.0693, "lr": 3.706882236798298e-07, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:42:56", "remaining_time": "0:02:13", "throughput": 2499.03, "total_tokens": 6438896}
|