Training in progress, step 17172
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +190 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58745928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c9b754b8ba5a39ca16af4c99ab64cc8b0e1d1e981c5a4366cfc61eac2d2b06c
|
| 3 |
size 58745928
|
trainer_log.jsonl
CHANGED
|
@@ -3264,3 +3264,193 @@
|
|
| 3264 |
{"current_steps": 16235, "total_steps": 19080, "loss": 0.191, "lr": 3.312868001063654e-06, "epoch": 8.508909853249476, "percentage": 85.09, "elapsed_time": "1:15:16", "remaining_time": "0:13:11", "throughput": 2350.39, "total_tokens": 10614928}
|
| 3265 |
{"current_steps": 16240, "total_steps": 19080, "loss": 0.2706, "lr": 3.3015008120668072e-06, "epoch": 8.51153039832285, "percentage": 85.12, "elapsed_time": "1:15:17", "remaining_time": "0:13:10", "throughput": 2350.45, "total_tokens": 10618320}
|
| 3266 |
{"current_steps": 16245, "total_steps": 19080, "loss": 0.2456, "lr": 3.290151779382922e-06, "epoch": 8.514150943396226, "percentage": 85.14, "elapsed_time": "1:15:18", "remaining_time": "0:13:08", "throughput": 2350.49, "total_tokens": 10621296}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3264 |
{"current_steps": 16235, "total_steps": 19080, "loss": 0.191, "lr": 3.312868001063654e-06, "epoch": 8.508909853249476, "percentage": 85.09, "elapsed_time": "1:15:16", "remaining_time": "0:13:11", "throughput": 2350.39, "total_tokens": 10614928}
|
| 3265 |
{"current_steps": 16240, "total_steps": 19080, "loss": 0.2706, "lr": 3.3015008120668072e-06, "epoch": 8.51153039832285, "percentage": 85.12, "elapsed_time": "1:15:17", "remaining_time": "0:13:10", "throughput": 2350.45, "total_tokens": 10618320}
|
| 3266 |
{"current_steps": 16245, "total_steps": 19080, "loss": 0.2456, "lr": 3.290151779382922e-06, "epoch": 8.514150943396226, "percentage": 85.14, "elapsed_time": "1:15:18", "remaining_time": "0:13:08", "throughput": 2350.49, "total_tokens": 10621296}
|
| 3267 |
+
{"current_steps": 16250, "total_steps": 19080, "loss": 0.2513, "lr": 3.2788209125083654e-06, "epoch": 8.516771488469601, "percentage": 85.17, "elapsed_time": "1:15:19", "remaining_time": "0:13:07", "throughput": 2350.48, "total_tokens": 10623984}
|
| 3268 |
+
{"current_steps": 16255, "total_steps": 19080, "loss": 0.1983, "lr": 3.267508220924287e-06, "epoch": 8.519392033542976, "percentage": 85.19, "elapsed_time": "1:15:21", "remaining_time": "0:13:05", "throughput": 2350.58, "total_tokens": 10628560}
|
| 3269 |
+
{"current_steps": 16260, "total_steps": 19080, "loss": 0.2134, "lr": 3.256213714096623e-06, "epoch": 8.522012578616351, "percentage": 85.22, "elapsed_time": "1:15:22", "remaining_time": "0:13:04", "throughput": 2350.65, "total_tokens": 10631856}
|
| 3270 |
+
{"current_steps": 16265, "total_steps": 19080, "loss": 0.2104, "lr": 3.2449374014761114e-06, "epoch": 8.524633123689728, "percentage": 85.25, "elapsed_time": "1:15:24", "remaining_time": "0:13:03", "throughput": 2350.72, "total_tokens": 10635696}
|
| 3271 |
+
{"current_steps": 16270, "total_steps": 19080, "loss": 0.2086, "lr": 3.2336792924982514e-06, "epoch": 8.527253668763104, "percentage": 85.27, "elapsed_time": "1:15:25", "remaining_time": "0:13:01", "throughput": 2350.75, "total_tokens": 10638384}
|
| 3272 |
+
{"current_steps": 16275, "total_steps": 19080, "loss": 0.256, "lr": 3.222439396583307e-06, "epoch": 8.529874213836479, "percentage": 85.3, "elapsed_time": "1:15:26", "remaining_time": "0:13:00", "throughput": 2350.78, "total_tokens": 10641456}
|
| 3273 |
+
{"current_steps": 16280, "total_steps": 19080, "loss": 0.1851, "lr": 3.2112177231363226e-06, "epoch": 8.532494758909854, "percentage": 85.32, "elapsed_time": "1:15:27", "remaining_time": "0:12:58", "throughput": 2350.8, "total_tokens": 10644336}
|
| 3274 |
+
{"current_steps": 16285, "total_steps": 19080, "loss": 0.188, "lr": 3.2000142815470756e-06, "epoch": 8.535115303983229, "percentage": 85.35, "elapsed_time": "1:15:29", "remaining_time": "0:12:57", "throughput": 2350.85, "total_tokens": 10647440}
|
| 3275 |
+
{"current_steps": 16290, "total_steps": 19080, "loss": 0.2078, "lr": 3.188829081190095e-06, "epoch": 8.537735849056604, "percentage": 85.38, "elapsed_time": "1:15:30", "remaining_time": "0:12:55", "throughput": 2350.84, "total_tokens": 10649872}
|
| 3276 |
+
{"current_steps": 16295, "total_steps": 19080, "loss": 0.2115, "lr": 3.1776621314246384e-06, "epoch": 8.54035639412998, "percentage": 85.4, "elapsed_time": "1:15:31", "remaining_time": "0:12:54", "throughput": 2350.88, "total_tokens": 10652976}
|
| 3277 |
+
{"current_steps": 16300, "total_steps": 19080, "loss": 0.1562, "lr": 3.1665134415947125e-06, "epoch": 8.542976939203355, "percentage": 85.43, "elapsed_time": "1:15:32", "remaining_time": "0:12:53", "throughput": 2350.9, "total_tokens": 10656016}
|
| 3278 |
+
{"current_steps": 16305, "total_steps": 19080, "loss": 0.3158, "lr": 3.1553830210290236e-06, "epoch": 8.54559748427673, "percentage": 85.46, "elapsed_time": "1:15:34", "remaining_time": "0:12:51", "throughput": 2350.98, "total_tokens": 10659536}
|
| 3279 |
+
{"current_steps": 16310, "total_steps": 19080, "loss": 0.1693, "lr": 3.1442708790410002e-06, "epoch": 8.548218029350105, "percentage": 85.48, "elapsed_time": "1:15:35", "remaining_time": "0:12:50", "throughput": 2351.02, "total_tokens": 10662416}
|
| 3280 |
+
{"current_steps": 16315, "total_steps": 19080, "loss": 0.1778, "lr": 3.133177024928771e-06, "epoch": 8.55083857442348, "percentage": 85.51, "elapsed_time": "1:15:36", "remaining_time": "0:12:48", "throughput": 2351.13, "total_tokens": 10666320}
|
| 3281 |
+
{"current_steps": 16320, "total_steps": 19080, "loss": 0.2989, "lr": 3.1221014679751777e-06, "epoch": 8.553459119496855, "percentage": 85.53, "elapsed_time": "1:15:37", "remaining_time": "0:12:47", "throughput": 2351.16, "total_tokens": 10669136}
|
| 3282 |
+
{"current_steps": 16325, "total_steps": 19080, "loss": 0.1971, "lr": 3.111044217447731e-06, "epoch": 8.55607966457023, "percentage": 85.56, "elapsed_time": "1:15:39", "remaining_time": "0:12:46", "throughput": 2351.18, "total_tokens": 10672240}
|
| 3283 |
+
{"current_steps": 16330, "total_steps": 19080, "loss": 0.2158, "lr": 3.1000052825986366e-06, "epoch": 8.558700209643606, "percentage": 85.59, "elapsed_time": "1:15:40", "remaining_time": "0:12:44", "throughput": 2351.21, "total_tokens": 10674992}
|
| 3284 |
+
{"current_steps": 16335, "total_steps": 19080, "loss": 0.2185, "lr": 3.0889846726647657e-06, "epoch": 8.56132075471698, "percentage": 85.61, "elapsed_time": "1:15:41", "remaining_time": "0:12:43", "throughput": 2351.21, "total_tokens": 10677616}
|
| 3285 |
+
{"current_steps": 16340, "total_steps": 19080, "loss": 0.1976, "lr": 3.077982396867668e-06, "epoch": 8.563941299790356, "percentage": 85.64, "elapsed_time": "1:15:42", "remaining_time": "0:12:41", "throughput": 2351.29, "total_tokens": 10681136}
|
| 3286 |
+
{"current_steps": 16345, "total_steps": 19080, "loss": 0.2977, "lr": 3.066998464413545e-06, "epoch": 8.566561844863731, "percentage": 85.67, "elapsed_time": "1:15:43", "remaining_time": "0:12:40", "throughput": 2351.35, "total_tokens": 10684368}
|
| 3287 |
+
{"current_steps": 16350, "total_steps": 19080, "loss": 0.2747, "lr": 3.056032884493243e-06, "epoch": 8.569182389937106, "percentage": 85.69, "elapsed_time": "1:15:45", "remaining_time": "0:12:38", "throughput": 2351.38, "total_tokens": 10687632}
|
| 3288 |
+
{"current_steps": 16355, "total_steps": 19080, "loss": 0.2258, "lr": 3.045085666282266e-06, "epoch": 8.571802935010481, "percentage": 85.72, "elapsed_time": "1:15:47", "remaining_time": "0:12:37", "throughput": 2351.49, "total_tokens": 10692336}
|
| 3289 |
+
{"current_steps": 16360, "total_steps": 19080, "loss": 0.2219, "lr": 3.034156818940745e-06, "epoch": 8.574423480083858, "percentage": 85.74, "elapsed_time": "1:15:48", "remaining_time": "0:12:36", "throughput": 2351.59, "total_tokens": 10696048}
|
| 3290 |
+
{"current_steps": 16365, "total_steps": 19080, "loss": 0.1744, "lr": 3.0232463516134317e-06, "epoch": 8.577044025157234, "percentage": 85.77, "elapsed_time": "1:15:50", "remaining_time": "0:12:34", "throughput": 2351.66, "total_tokens": 10700304}
|
| 3291 |
+
{"current_steps": 16370, "total_steps": 19080, "loss": 0.1773, "lr": 3.0123542734297267e-06, "epoch": 8.579664570230609, "percentage": 85.8, "elapsed_time": "1:15:51", "remaining_time": "0:12:33", "throughput": 2351.66, "total_tokens": 10703120}
|
| 3292 |
+
{"current_steps": 16375, "total_steps": 19080, "loss": 0.2062, "lr": 3.0014805935035973e-06, "epoch": 8.582285115303984, "percentage": 85.82, "elapsed_time": "1:15:52", "remaining_time": "0:12:32", "throughput": 2351.8, "total_tokens": 10707536}
|
| 3293 |
+
{"current_steps": 16380, "total_steps": 19080, "loss": 0.2787, "lr": 2.99062532093366e-06, "epoch": 8.584905660377359, "percentage": 85.85, "elapsed_time": "1:15:54", "remaining_time": "0:12:30", "throughput": 2351.83, "total_tokens": 10710384}
|
| 3294 |
+
{"current_steps": 16385, "total_steps": 19080, "loss": 0.1888, "lr": 2.979788464803107e-06, "epoch": 8.587526205450734, "percentage": 85.88, "elapsed_time": "1:15:55", "remaining_time": "0:12:29", "throughput": 2351.91, "total_tokens": 10713840}
|
| 3295 |
+
{"current_steps": 16390, "total_steps": 19080, "loss": 0.1922, "lr": 2.968970034179719e-06, "epoch": 8.59014675052411, "percentage": 85.9, "elapsed_time": "1:15:56", "remaining_time": "0:12:27", "throughput": 2351.94, "total_tokens": 10716720}
|
| 3296 |
+
{"current_steps": 16395, "total_steps": 19080, "loss": 0.23, "lr": 2.9581700381158735e-06, "epoch": 8.592767295597485, "percentage": 85.93, "elapsed_time": "1:15:58", "remaining_time": "0:12:26", "throughput": 2352.02, "total_tokens": 10720720}
|
| 3297 |
+
{"current_steps": 16400, "total_steps": 19080, "loss": 0.1848, "lr": 2.9473884856485113e-06, "epoch": 8.59538784067086, "percentage": 85.95, "elapsed_time": "1:15:59", "remaining_time": "0:12:25", "throughput": 2352.06, "total_tokens": 10723952}
|
| 3298 |
+
{"current_steps": 16405, "total_steps": 19080, "loss": 0.2939, "lr": 2.936625385799133e-06, "epoch": 8.598008385744235, "percentage": 85.98, "elapsed_time": "1:16:00", "remaining_time": "0:12:23", "throughput": 2351.99, "total_tokens": 10726096}
|
| 3299 |
+
{"current_steps": 16410, "total_steps": 19080, "loss": 0.1837, "lr": 2.925880747573831e-06, "epoch": 8.60062893081761, "percentage": 86.01, "elapsed_time": "1:16:01", "remaining_time": "0:12:22", "throughput": 2352.05, "total_tokens": 10729456}
|
| 3300 |
+
{"current_steps": 16415, "total_steps": 19080, "loss": 0.186, "lr": 2.9151545799632003e-06, "epoch": 8.603249475890985, "percentage": 86.03, "elapsed_time": "1:16:02", "remaining_time": "0:12:20", "throughput": 2352.04, "total_tokens": 10731824}
|
| 3301 |
+
{"current_steps": 16420, "total_steps": 19080, "loss": 0.1916, "lr": 2.9044468919424305e-06, "epoch": 8.60587002096436, "percentage": 86.06, "elapsed_time": "1:16:03", "remaining_time": "0:12:19", "throughput": 2352.09, "total_tokens": 10734864}
|
| 3302 |
+
{"current_steps": 16425, "total_steps": 19080, "loss": 0.1915, "lr": 2.8937576924712133e-06, "epoch": 8.608490566037736, "percentage": 86.08, "elapsed_time": "1:16:05", "remaining_time": "0:12:17", "throughput": 2352.09, "total_tokens": 10737744}
|
| 3303 |
+
{"current_steps": 16430, "total_steps": 19080, "loss": 0.1458, "lr": 2.883086990493783e-06, "epoch": 8.61111111111111, "percentage": 86.11, "elapsed_time": "1:16:06", "remaining_time": "0:12:16", "throughput": 2352.13, "total_tokens": 10740656}
|
| 3304 |
+
{"current_steps": 16435, "total_steps": 19080, "loss": 0.3675, "lr": 2.872434794938905e-06, "epoch": 8.613731656184486, "percentage": 86.14, "elapsed_time": "1:16:07", "remaining_time": "0:12:15", "throughput": 2352.23, "total_tokens": 10744688}
|
| 3305 |
+
{"current_steps": 16440, "total_steps": 19080, "loss": 0.2388, "lr": 2.861801114719842e-06, "epoch": 8.616352201257861, "percentage": 86.16, "elapsed_time": "1:16:08", "remaining_time": "0:12:13", "throughput": 2352.22, "total_tokens": 10747184}
|
| 3306 |
+
{"current_steps": 16445, "total_steps": 19080, "loss": 0.2486, "lr": 2.8511859587343704e-06, "epoch": 8.618972746331236, "percentage": 86.19, "elapsed_time": "1:16:10", "remaining_time": "0:12:12", "throughput": 2352.25, "total_tokens": 10750320}
|
| 3307 |
+
{"current_steps": 16450, "total_steps": 19080, "loss": 0.1614, "lr": 2.840589335864774e-06, "epoch": 8.621593291404611, "percentage": 86.22, "elapsed_time": "1:16:11", "remaining_time": "0:12:10", "throughput": 2352.3, "total_tokens": 10753328}
|
| 3308 |
+
{"current_steps": 16455, "total_steps": 19080, "loss": 0.2083, "lr": 2.830011254977821e-06, "epoch": 8.624213836477988, "percentage": 86.24, "elapsed_time": "1:16:12", "remaining_time": "0:12:09", "throughput": 2352.32, "total_tokens": 10756208}
|
| 3309 |
+
{"current_steps": 16460, "total_steps": 19080, "loss": 0.203, "lr": 2.819451724924768e-06, "epoch": 8.626834381551364, "percentage": 86.27, "elapsed_time": "1:16:14", "remaining_time": "0:12:08", "throughput": 2352.4, "total_tokens": 10759888}
|
| 3310 |
+
{"current_steps": 16465, "total_steps": 19080, "loss": 0.1741, "lr": 2.8089107545413355e-06, "epoch": 8.629454926624739, "percentage": 86.29, "elapsed_time": "1:16:15", "remaining_time": "0:12:06", "throughput": 2352.46, "total_tokens": 10763152}
|
| 3311 |
+
{"current_steps": 16470, "total_steps": 19080, "loss": 0.2372, "lr": 2.7983883526477433e-06, "epoch": 8.632075471698114, "percentage": 86.32, "elapsed_time": "1:16:16", "remaining_time": "0:12:05", "throughput": 2352.57, "total_tokens": 10767024}
|
| 3312 |
+
{"current_steps": 16475, "total_steps": 19080, "loss": 0.1842, "lr": 2.7878845280486453e-06, "epoch": 8.634696016771489, "percentage": 86.35, "elapsed_time": "1:16:17", "remaining_time": "0:12:03", "throughput": 2352.61, "total_tokens": 10770096}
|
| 3313 |
+
{"current_steps": 16480, "total_steps": 19080, "loss": 0.2215, "lr": 2.777399289533164e-06, "epoch": 8.637316561844864, "percentage": 86.37, "elapsed_time": "1:16:19", "remaining_time": "0:12:02", "throughput": 2352.75, "total_tokens": 10774576}
|
| 3314 |
+
{"current_steps": 16485, "total_steps": 19080, "loss": 0.1895, "lr": 2.766932645874873e-06, "epoch": 8.63993710691824, "percentage": 86.4, "elapsed_time": "1:16:20", "remaining_time": "0:12:01", "throughput": 2352.75, "total_tokens": 10777296}
|
| 3315 |
+
{"current_steps": 16490, "total_steps": 19080, "loss": 0.2062, "lr": 2.756484605831777e-06, "epoch": 8.642557651991615, "percentage": 86.43, "elapsed_time": "1:16:21", "remaining_time": "0:11:59", "throughput": 2352.74, "total_tokens": 10779696}
|
| 3316 |
+
{"current_steps": 16495, "total_steps": 19080, "loss": 0.2209, "lr": 2.74605517814632e-06, "epoch": 8.64517819706499, "percentage": 86.45, "elapsed_time": "1:16:23", "remaining_time": "0:11:58", "throughput": 2352.81, "total_tokens": 10783184}
|
| 3317 |
+
{"current_steps": 16500, "total_steps": 19080, "loss": 0.3353, "lr": 2.7356443715453705e-06, "epoch": 8.647798742138365, "percentage": 86.48, "elapsed_time": "1:16:24", "remaining_time": "0:11:56", "throughput": 2352.86, "total_tokens": 10786512}
|
| 3318 |
+
{"current_steps": 16505, "total_steps": 19080, "loss": 0.1798, "lr": 2.725252194740213e-06, "epoch": 8.65041928721174, "percentage": 86.5, "elapsed_time": "1:16:25", "remaining_time": "0:11:55", "throughput": 2352.92, "total_tokens": 10789872}
|
| 3319 |
+
{"current_steps": 16510, "total_steps": 19080, "loss": 0.1893, "lr": 2.714878656426553e-06, "epoch": 8.653039832285115, "percentage": 86.53, "elapsed_time": "1:16:26", "remaining_time": "0:11:54", "throughput": 2352.96, "total_tokens": 10792944}
|
| 3320 |
+
{"current_steps": 16515, "total_steps": 19080, "loss": 0.2053, "lr": 2.704523765284489e-06, "epoch": 8.65566037735849, "percentage": 86.56, "elapsed_time": "1:16:28", "remaining_time": "0:11:52", "throughput": 2353.04, "total_tokens": 10796624}
|
| 3321 |
+
{"current_steps": 16520, "total_steps": 19080, "loss": 0.2192, "lr": 2.6941875299785174e-06, "epoch": 8.658280922431866, "percentage": 86.58, "elapsed_time": "1:16:29", "remaining_time": "0:11:51", "throughput": 2353.09, "total_tokens": 10800144}
|
| 3322 |
+
{"current_steps": 16525, "total_steps": 19080, "loss": 0.1803, "lr": 2.683869959157534e-06, "epoch": 8.66090146750524, "percentage": 86.61, "elapsed_time": "1:16:31", "remaining_time": "0:11:49", "throughput": 2353.11, "total_tokens": 10803152}
|
| 3323 |
+
{"current_steps": 16530, "total_steps": 19080, "loss": 0.2963, "lr": 2.673571061454813e-06, "epoch": 8.663522012578616, "percentage": 86.64, "elapsed_time": "1:16:32", "remaining_time": "0:11:48", "throughput": 2353.09, "total_tokens": 10805552}
|
| 3324 |
+
{"current_steps": 16535, "total_steps": 19080, "loss": 0.2044, "lr": 2.6632908454879898e-06, "epoch": 8.666142557651991, "percentage": 86.66, "elapsed_time": "1:16:33", "remaining_time": "0:11:47", "throughput": 2353.12, "total_tokens": 10808880}
|
| 3325 |
+
{"current_steps": 16540, "total_steps": 19080, "loss": 0.2055, "lr": 2.653029319859096e-06, "epoch": 8.668763102725366, "percentage": 86.69, "elapsed_time": "1:16:34", "remaining_time": "0:11:45", "throughput": 2353.17, "total_tokens": 10812240}
|
| 3326 |
+
{"current_steps": 16545, "total_steps": 19080, "loss": 0.1915, "lr": 2.642786493154492e-06, "epoch": 8.671383647798741, "percentage": 86.71, "elapsed_time": "1:16:36", "remaining_time": "0:11:44", "throughput": 2353.22, "total_tokens": 10815600}
|
| 3327 |
+
{"current_steps": 16550, "total_steps": 19080, "loss": 0.1996, "lr": 2.6325623739449108e-06, "epoch": 8.674004192872118, "percentage": 86.74, "elapsed_time": "1:16:37", "remaining_time": "0:11:42", "throughput": 2353.3, "total_tokens": 10819120}
|
| 3328 |
+
{"current_steps": 16555, "total_steps": 19080, "loss": 0.1676, "lr": 2.6223569707854444e-06, "epoch": 8.676624737945493, "percentage": 86.77, "elapsed_time": "1:16:38", "remaining_time": "0:11:41", "throughput": 2353.27, "total_tokens": 10821744}
|
| 3329 |
+
{"current_steps": 16560, "total_steps": 19080, "loss": 0.2162, "lr": 2.612170292215482e-06, "epoch": 8.679245283018869, "percentage": 86.79, "elapsed_time": "1:16:39", "remaining_time": "0:11:39", "throughput": 2353.26, "total_tokens": 10824336}
|
| 3330 |
+
{"current_steps": 16565, "total_steps": 19080, "loss": 0.2595, "lr": 2.6020023467587917e-06, "epoch": 8.681865828092244, "percentage": 86.82, "elapsed_time": "1:16:40", "remaining_time": "0:11:38", "throughput": 2353.23, "total_tokens": 10826672}
|
| 3331 |
+
{"current_steps": 16570, "total_steps": 19080, "loss": 0.18, "lr": 2.5918531429234368e-06, "epoch": 8.684486373165619, "percentage": 86.84, "elapsed_time": "1:16:42", "remaining_time": "0:11:37", "throughput": 2353.3, "total_tokens": 10830192}
|
| 3332 |
+
{"current_steps": 16575, "total_steps": 19080, "loss": 0.3276, "lr": 2.5817226892018016e-06, "epoch": 8.687106918238994, "percentage": 86.87, "elapsed_time": "1:16:43", "remaining_time": "0:11:35", "throughput": 2353.37, "total_tokens": 10834096}
|
| 3333 |
+
{"current_steps": 16580, "total_steps": 19080, "loss": 0.2142, "lr": 2.571610994070603e-06, "epoch": 8.68972746331237, "percentage": 86.9, "elapsed_time": "1:16:44", "remaining_time": "0:11:34", "throughput": 2353.42, "total_tokens": 10837360}
|
| 3334 |
+
{"current_steps": 16585, "total_steps": 19080, "loss": 0.3037, "lr": 2.561518065990834e-06, "epoch": 8.692348008385745, "percentage": 86.92, "elapsed_time": "1:16:46", "remaining_time": "0:11:32", "throughput": 2353.53, "total_tokens": 10841168}
|
| 3335 |
+
{"current_steps": 16590, "total_steps": 19080, "loss": 0.1603, "lr": 2.5514439134077945e-06, "epoch": 8.69496855345912, "percentage": 86.95, "elapsed_time": "1:16:47", "remaining_time": "0:11:31", "throughput": 2353.61, "total_tokens": 10844784}
|
| 3336 |
+
{"current_steps": 16595, "total_steps": 19080, "loss": 0.2142, "lr": 2.541388544751089e-06, "epoch": 8.697589098532495, "percentage": 86.98, "elapsed_time": "1:16:48", "remaining_time": "0:11:30", "throughput": 2353.62, "total_tokens": 10847376}
|
| 3337 |
+
{"current_steps": 16600, "total_steps": 19080, "loss": 0.174, "lr": 2.53135196843457e-06, "epoch": 8.70020964360587, "percentage": 87.0, "elapsed_time": "1:16:49", "remaining_time": "0:11:28", "throughput": 2353.6, "total_tokens": 10850000}
|
| 3338 |
+
{"current_steps": 16605, "total_steps": 19080, "loss": 0.1484, "lr": 2.521334192856403e-06, "epoch": 8.702830188679245, "percentage": 87.03, "elapsed_time": "1:16:51", "remaining_time": "0:11:27", "throughput": 2353.6, "total_tokens": 10852528}
|
| 3339 |
+
{"current_steps": 16610, "total_steps": 19080, "loss": 0.1675, "lr": 2.5113352263990005e-06, "epoch": 8.70545073375262, "percentage": 87.05, "elapsed_time": "1:16:52", "remaining_time": "0:11:25", "throughput": 2353.6, "total_tokens": 10855312}
|
| 3340 |
+
{"current_steps": 16615, "total_steps": 19080, "loss": 0.3575, "lr": 2.5013550774290322e-06, "epoch": 8.708071278825996, "percentage": 87.08, "elapsed_time": "1:16:53", "remaining_time": "0:11:24", "throughput": 2353.71, "total_tokens": 10859376}
|
| 3341 |
+
{"current_steps": 16620, "total_steps": 19080, "loss": 0.227, "lr": 2.491393754297444e-06, "epoch": 8.71069182389937, "percentage": 87.11, "elapsed_time": "1:16:54", "remaining_time": "0:11:23", "throughput": 2353.74, "total_tokens": 10862224}
|
| 3342 |
+
{"current_steps": 16625, "total_steps": 19080, "loss": 0.2434, "lr": 2.48145126533941e-06, "epoch": 8.713312368972746, "percentage": 87.13, "elapsed_time": "1:16:56", "remaining_time": "0:11:21", "throughput": 2353.76, "total_tokens": 10865104}
|
| 3343 |
+
{"current_steps": 16630, "total_steps": 19080, "loss": 0.1776, "lr": 2.4715276188743476e-06, "epoch": 8.715932914046121, "percentage": 87.16, "elapsed_time": "1:16:57", "remaining_time": "0:11:20", "throughput": 2353.77, "total_tokens": 10867792}
|
| 3344 |
+
{"current_steps": 16635, "total_steps": 19080, "loss": 0.2068, "lr": 2.461622823205917e-06, "epoch": 8.718553459119496, "percentage": 87.19, "elapsed_time": "1:16:58", "remaining_time": "0:11:18", "throughput": 2353.8, "total_tokens": 10870768}
|
| 3345 |
+
{"current_steps": 16640, "total_steps": 19080, "loss": 0.133, "lr": 2.451736886621997e-06, "epoch": 8.721174004192871, "percentage": 87.21, "elapsed_time": "1:16:59", "remaining_time": "0:11:17", "throughput": 2353.91, "total_tokens": 10875056}
|
| 3346 |
+
{"current_steps": 16645, "total_steps": 19080, "loss": 0.2429, "lr": 2.4418698173946872e-06, "epoch": 8.723794549266248, "percentage": 87.24, "elapsed_time": "1:17:01", "remaining_time": "0:11:16", "throughput": 2353.95, "total_tokens": 10878288}
|
| 3347 |
+
{"current_steps": 16650, "total_steps": 19080, "loss": 0.2505, "lr": 2.432021623780295e-06, "epoch": 8.726415094339622, "percentage": 87.26, "elapsed_time": "1:17:02", "remaining_time": "0:11:14", "throughput": 2353.93, "total_tokens": 10880720}
|
| 3348 |
+
{"current_steps": 16655, "total_steps": 19080, "loss": 0.1294, "lr": 2.4221923140193477e-06, "epoch": 8.729035639412999, "percentage": 87.29, "elapsed_time": "1:17:03", "remaining_time": "0:11:13", "throughput": 2354.01, "total_tokens": 10884528}
|
| 3349 |
+
{"current_steps": 16660, "total_steps": 19080, "loss": 0.2665, "lr": 2.41238189633656e-06, "epoch": 8.731656184486374, "percentage": 87.32, "elapsed_time": "1:17:04", "remaining_time": "0:11:11", "throughput": 2354.0, "total_tokens": 10886960}
|
| 3350 |
+
{"current_steps": 16665, "total_steps": 19080, "loss": 0.2221, "lr": 2.402590378940836e-06, "epoch": 8.734276729559749, "percentage": 87.34, "elapsed_time": "1:17:06", "remaining_time": "0:11:10", "throughput": 2354.02, "total_tokens": 10889904}
|
| 3351 |
+
{"current_steps": 16670, "total_steps": 19080, "loss": 0.2701, "lr": 2.3928177700252798e-06, "epoch": 8.736897274633124, "percentage": 87.37, "elapsed_time": "1:17:07", "remaining_time": "0:11:08", "throughput": 2354.07, "total_tokens": 10893072}
|
| 3352 |
+
{"current_steps": 16675, "total_steps": 19080, "loss": 0.2631, "lr": 2.3830640777671583e-06, "epoch": 8.7395178197065, "percentage": 87.4, "elapsed_time": "1:17:08", "remaining_time": "0:11:07", "throughput": 2354.2, "total_tokens": 10897392}
|
| 3353 |
+
{"current_steps": 16680, "total_steps": 19080, "loss": 0.2706, "lr": 2.3733293103279153e-06, "epoch": 8.742138364779874, "percentage": 87.42, "elapsed_time": "1:17:10", "remaining_time": "0:11:06", "throughput": 2354.28, "total_tokens": 10901008}
|
| 3354 |
+
{"current_steps": 16685, "total_steps": 19080, "loss": 0.2411, "lr": 2.3636134758531604e-06, "epoch": 8.74475890985325, "percentage": 87.45, "elapsed_time": "1:17:11", "remaining_time": "0:11:04", "throughput": 2354.36, "total_tokens": 10904848}
|
| 3355 |
+
{"current_steps": 16690, "total_steps": 19080, "loss": 0.2201, "lr": 2.3539165824726565e-06, "epoch": 8.747379454926625, "percentage": 87.47, "elapsed_time": "1:17:13", "remaining_time": "0:11:03", "throughput": 2354.45, "total_tokens": 10908528}
|
| 3356 |
+
{"current_steps": 16695, "total_steps": 19080, "loss": 0.2785, "lr": 2.344238638300328e-06, "epoch": 8.75, "percentage": 87.5, "elapsed_time": "1:17:14", "remaining_time": "0:11:02", "throughput": 2354.51, "total_tokens": 10912080}
|
| 3357 |
+
{"current_steps": 16700, "total_steps": 19080, "loss": 0.2837, "lr": 2.334579651434235e-06, "epoch": 8.752620545073375, "percentage": 87.53, "elapsed_time": "1:17:16", "remaining_time": "0:11:00", "throughput": 2354.61, "total_tokens": 10916528}
|
| 3358 |
+
{"current_steps": 16705, "total_steps": 19080, "loss": 0.1919, "lr": 2.3249396299565683e-06, "epoch": 8.75524109014675, "percentage": 87.55, "elapsed_time": "1:17:17", "remaining_time": "0:10:59", "throughput": 2354.72, "total_tokens": 10920688}
|
| 3359 |
+
{"current_steps": 16710, "total_steps": 19080, "loss": 0.2429, "lr": 2.3153185819336705e-06, "epoch": 8.757861635220126, "percentage": 87.58, "elapsed_time": "1:17:18", "remaining_time": "0:10:57", "throughput": 2354.74, "total_tokens": 10923600}
|
| 3360 |
+
{"current_steps": 16715, "total_steps": 19080, "loss": 0.1835, "lr": 2.3057165154159873e-06, "epoch": 8.7604821802935, "percentage": 87.6, "elapsed_time": "1:17:20", "remaining_time": "0:10:56", "throughput": 2354.78, "total_tokens": 10926544}
|
| 3361 |
+
{"current_steps": 16720, "total_steps": 19080, "loss": 0.1913, "lr": 2.296133438438086e-06, "epoch": 8.763102725366876, "percentage": 87.63, "elapsed_time": "1:17:21", "remaining_time": "0:10:55", "throughput": 2354.8, "total_tokens": 10929392}
|
| 3362 |
+
{"current_steps": 16725, "total_steps": 19080, "loss": 0.194, "lr": 2.2865693590186616e-06, "epoch": 8.765723270440251, "percentage": 87.66, "elapsed_time": "1:17:22", "remaining_time": "0:10:53", "throughput": 2354.91, "total_tokens": 10933456}
|
| 3363 |
+
{"current_steps": 16730, "total_steps": 19080, "loss": 0.2955, "lr": 2.2770242851604813e-06, "epoch": 8.768343815513626, "percentage": 87.68, "elapsed_time": "1:17:24", "remaining_time": "0:10:52", "throughput": 2354.92, "total_tokens": 10936336}
|
| 3364 |
+
{"current_steps": 16735, "total_steps": 19080, "loss": 0.2308, "lr": 2.2674982248504395e-06, "epoch": 8.770964360587001, "percentage": 87.71, "elapsed_time": "1:17:25", "remaining_time": "0:10:50", "throughput": 2354.98, "total_tokens": 10939696}
|
| 3365 |
+
{"current_steps": 16740, "total_steps": 19080, "loss": 0.15, "lr": 2.257991186059502e-06, "epoch": 8.773584905660378, "percentage": 87.74, "elapsed_time": "1:17:26", "remaining_time": "0:10:49", "throughput": 2354.98, "total_tokens": 10942192}
|
| 3366 |
+
{"current_steps": 16745, "total_steps": 19080, "loss": 0.1341, "lr": 2.248503176742725e-06, "epoch": 8.776205450733752, "percentage": 87.76, "elapsed_time": "1:17:27", "remaining_time": "0:10:48", "throughput": 2354.97, "total_tokens": 10944624}
|
| 3367 |
+
{"current_steps": 16750, "total_steps": 19080, "loss": 0.1738, "lr": 2.2390342048392467e-06, "epoch": 8.778825995807129, "percentage": 87.79, "elapsed_time": "1:17:28", "remaining_time": "0:10:46", "throughput": 2355.03, "total_tokens": 10947952}
|
| 3368 |
+
{"current_steps": 16755, "total_steps": 19080, "loss": 0.2022, "lr": 2.229584278272265e-06, "epoch": 8.781446540880504, "percentage": 87.81, "elapsed_time": "1:17:30", "remaining_time": "0:10:45", "throughput": 2355.09, "total_tokens": 10951440}
|
| 3369 |
+
{"current_steps": 16760, "total_steps": 19080, "loss": 0.1404, "lr": 2.2201534049490436e-06, "epoch": 8.784067085953879, "percentage": 87.84, "elapsed_time": "1:17:31", "remaining_time": "0:10:43", "throughput": 2355.11, "total_tokens": 10954224}
|
| 3370 |
+
{"current_steps": 16765, "total_steps": 19080, "loss": 0.1948, "lr": 2.2107415927609176e-06, "epoch": 8.786687631027254, "percentage": 87.87, "elapsed_time": "1:17:32", "remaining_time": "0:10:42", "throughput": 2355.16, "total_tokens": 10957392}
|
| 3371 |
+
{"current_steps": 16770, "total_steps": 19080, "loss": 0.2789, "lr": 2.2013488495832542e-06, "epoch": 8.78930817610063, "percentage": 87.89, "elapsed_time": "1:17:33", "remaining_time": "0:10:41", "throughput": 2355.25, "total_tokens": 10961136}
|
| 3372 |
+
{"current_steps": 16775, "total_steps": 19080, "loss": 0.201, "lr": 2.1919751832754714e-06, "epoch": 8.791928721174004, "percentage": 87.92, "elapsed_time": "1:17:35", "remaining_time": "0:10:39", "throughput": 2355.29, "total_tokens": 10964272}
|
| 3373 |
+
{"current_steps": 16780, "total_steps": 19080, "loss": 0.1305, "lr": 2.182620601681029e-06, "epoch": 8.79454926624738, "percentage": 87.95, "elapsed_time": "1:17:36", "remaining_time": "0:10:38", "throughput": 2355.33, "total_tokens": 10967344}
|
| 3374 |
+
{"current_steps": 16785, "total_steps": 19080, "loss": 0.2082, "lr": 2.1732851126274047e-06, "epoch": 8.797169811320755, "percentage": 87.97, "elapsed_time": "1:17:37", "remaining_time": "0:10:36", "throughput": 2355.39, "total_tokens": 10970800}
|
| 3375 |
+
{"current_steps": 16790, "total_steps": 19080, "loss": 0.1872, "lr": 2.1639687239261214e-06, "epoch": 8.79979035639413, "percentage": 88.0, "elapsed_time": "1:17:39", "remaining_time": "0:10:35", "throughput": 2355.47, "total_tokens": 10974544}
|
| 3376 |
+
{"current_steps": 16795, "total_steps": 19080, "loss": 0.2327, "lr": 2.1546714433726993e-06, "epoch": 8.802410901467505, "percentage": 88.02, "elapsed_time": "1:17:40", "remaining_time": "0:10:34", "throughput": 2355.44, "total_tokens": 10976784}
|
| 3377 |
+
{"current_steps": 16800, "total_steps": 19080, "loss": 0.2663, "lr": 2.1453932787466767e-06, "epoch": 8.80503144654088, "percentage": 88.05, "elapsed_time": "1:17:41", "remaining_time": "0:10:32", "throughput": 2355.51, "total_tokens": 10980400}
|
| 3378 |
+
{"current_steps": 16805, "total_steps": 19080, "loss": 0.1614, "lr": 2.1361342378116072e-06, "epoch": 8.807651991614255, "percentage": 88.08, "elapsed_time": "1:17:43", "remaining_time": "0:10:31", "throughput": 2355.68, "total_tokens": 10986160}
|
| 3379 |
+
{"current_steps": 16810, "total_steps": 19080, "loss": 0.2199, "lr": 2.1268943283150294e-06, "epoch": 8.81027253668763, "percentage": 88.1, "elapsed_time": "1:17:45", "remaining_time": "0:10:29", "throughput": 2355.73, "total_tokens": 10989584}
|
| 3380 |
+
{"current_steps": 16815, "total_steps": 19080, "loss": 0.2702, "lr": 2.1176735579884753e-06, "epoch": 8.812893081761006, "percentage": 88.13, "elapsed_time": "1:17:46", "remaining_time": "0:10:28", "throughput": 2355.78, "total_tokens": 10992464}
|
| 3381 |
+
{"current_steps": 16820, "total_steps": 19080, "loss": 0.2637, "lr": 2.1084719345474597e-06, "epoch": 8.815513626834381, "percentage": 88.16, "elapsed_time": "1:17:47", "remaining_time": "0:10:27", "throughput": 2355.8, "total_tokens": 10995472}
|
| 3382 |
+
{"current_steps": 16825, "total_steps": 19080, "loss": 0.1911, "lr": 2.0992894656914895e-06, "epoch": 8.818134171907756, "percentage": 88.18, "elapsed_time": "1:17:48", "remaining_time": "0:10:25", "throughput": 2355.86, "total_tokens": 10999088}
|
| 3383 |
+
{"current_steps": 16830, "total_steps": 19080, "loss": 0.2172, "lr": 2.0901261591040333e-06, "epoch": 8.820754716981131, "percentage": 88.21, "elapsed_time": "1:17:49", "remaining_time": "0:10:24", "throughput": 2355.91, "total_tokens": 11002000}
|
| 3384 |
+
{"current_steps": 16835, "total_steps": 19080, "loss": 0.2884, "lr": 2.0809820224525213e-06, "epoch": 8.823375262054507, "percentage": 88.23, "elapsed_time": "1:17:51", "remaining_time": "0:10:22", "throughput": 2355.95, "total_tokens": 11005360}
|
| 3385 |
+
{"current_steps": 16840, "total_steps": 19080, "loss": 0.1858, "lr": 2.0718570633883576e-06, "epoch": 8.825995807127882, "percentage": 88.26, "elapsed_time": "1:17:52", "remaining_time": "0:10:21", "throughput": 2355.98, "total_tokens": 11008272}
|
| 3386 |
+
{"current_steps": 16845, "total_steps": 19080, "loss": 0.2105, "lr": 2.0627512895468883e-06, "epoch": 8.828616352201259, "percentage": 88.29, "elapsed_time": "1:17:53", "remaining_time": "0:10:20", "throughput": 2356.06, "total_tokens": 11011760}
|
| 3387 |
+
{"current_steps": 16850, "total_steps": 19080, "loss": 0.2381, "lr": 2.0536647085474037e-06, "epoch": 8.831236897274634, "percentage": 88.31, "elapsed_time": "1:17:55", "remaining_time": "0:10:18", "throughput": 2356.09, "total_tokens": 11014736}
|
| 3388 |
+
{"current_steps": 16855, "total_steps": 19080, "loss": 0.2126, "lr": 2.044597327993153e-06, "epoch": 8.833857442348009, "percentage": 88.34, "elapsed_time": "1:17:56", "remaining_time": "0:10:17", "throughput": 2356.15, "total_tokens": 11018128}
|
| 3389 |
+
{"current_steps": 16860, "total_steps": 19080, "loss": 0.1508, "lr": 2.035549155471289e-06, "epoch": 8.836477987421384, "percentage": 88.36, "elapsed_time": "1:17:57", "remaining_time": "0:10:15", "throughput": 2356.25, "total_tokens": 11022032}
|
| 3390 |
+
{"current_steps": 16865, "total_steps": 19080, "loss": 0.1926, "lr": 2.0265201985529226e-06, "epoch": 8.83909853249476, "percentage": 88.39, "elapsed_time": "1:17:59", "remaining_time": "0:10:14", "throughput": 2356.26, "total_tokens": 11024944}
|
| 3391 |
+
{"current_steps": 16870, "total_steps": 19080, "loss": 0.1836, "lr": 2.0175104647930655e-06, "epoch": 8.841719077568134, "percentage": 88.42, "elapsed_time": "1:18:00", "remaining_time": "0:10:13", "throughput": 2356.3, "total_tokens": 11028272}
|
| 3392 |
+
{"current_steps": 16875, "total_steps": 19080, "loss": 0.2175, "lr": 2.008519961730651e-06, "epoch": 8.84433962264151, "percentage": 88.44, "elapsed_time": "1:18:01", "remaining_time": "0:10:11", "throughput": 2356.3, "total_tokens": 11031088}
|
| 3393 |
+
{"current_steps": 16880, "total_steps": 19080, "loss": 0.2602, "lr": 1.9995486968885284e-06, "epoch": 8.846960167714885, "percentage": 88.47, "elapsed_time": "1:18:02", "remaining_time": "0:10:10", "throughput": 2356.27, "total_tokens": 11033456}
|
| 3394 |
+
{"current_steps": 16885, "total_steps": 19080, "loss": 0.2631, "lr": 1.990596677773435e-06, "epoch": 8.84958071278826, "percentage": 88.5, "elapsed_time": "1:18:03", "remaining_time": "0:10:08", "throughput": 2356.33, "total_tokens": 11036976}
|
| 3395 |
+
{"current_steps": 16890, "total_steps": 19080, "loss": 0.2348, "lr": 1.981663911876014e-06, "epoch": 8.852201257861635, "percentage": 88.52, "elapsed_time": "1:18:05", "remaining_time": "0:10:07", "throughput": 2356.32, "total_tokens": 11039472}
|
| 3396 |
+
{"current_steps": 16895, "total_steps": 19080, "loss": 0.2363, "lr": 1.972750406670801e-06, "epoch": 8.85482180293501, "percentage": 88.55, "elapsed_time": "1:18:06", "remaining_time": "0:10:06", "throughput": 2356.34, "total_tokens": 11042064}
|
| 3397 |
+
{"current_steps": 16900, "total_steps": 19080, "loss": 0.2432, "lr": 1.9638561696161962e-06, "epoch": 8.857442348008385, "percentage": 88.57, "elapsed_time": "1:18:07", "remaining_time": "0:10:04", "throughput": 2356.37, "total_tokens": 11045232}
|
| 3398 |
+
{"current_steps": 16905, "total_steps": 19080, "loss": 0.2107, "lr": 1.954981208154502e-06, "epoch": 8.86006289308176, "percentage": 88.6, "elapsed_time": "1:18:08", "remaining_time": "0:10:03", "throughput": 2356.44, "total_tokens": 11048784}
|
| 3399 |
+
{"current_steps": 16910, "total_steps": 19080, "loss": 0.2489, "lr": 1.9461255297118868e-06, "epoch": 8.862683438155136, "percentage": 88.63, "elapsed_time": "1:18:10", "remaining_time": "0:10:01", "throughput": 2356.53, "total_tokens": 11052528}
|
| 3400 |
+
{"current_steps": 16915, "total_steps": 19080, "loss": 0.3, "lr": 1.937289141698359e-06, "epoch": 8.865303983228511, "percentage": 88.65, "elapsed_time": "1:18:11", "remaining_time": "0:10:00", "throughput": 2356.56, "total_tokens": 11055824}
|
| 3401 |
+
{"current_steps": 16920, "total_steps": 19080, "loss": 0.1533, "lr": 1.928472051507821e-06, "epoch": 8.867924528301886, "percentage": 88.68, "elapsed_time": "1:18:12", "remaining_time": "0:09:59", "throughput": 2356.61, "total_tokens": 11059504}
|
| 3402 |
+
{"current_steps": 16925, "total_steps": 19080, "loss": 0.1658, "lr": 1.919674266518004e-06, "epoch": 8.870545073375261, "percentage": 88.71, "elapsed_time": "1:18:14", "remaining_time": "0:09:57", "throughput": 2356.64, "total_tokens": 11062544}
|
| 3403 |
+
{"current_steps": 16930, "total_steps": 19080, "loss": 0.2187, "lr": 1.910895794090492e-06, "epoch": 8.873165618448636, "percentage": 88.73, "elapsed_time": "1:18:15", "remaining_time": "0:09:56", "throughput": 2356.65, "total_tokens": 11065584}
|
| 3404 |
+
{"current_steps": 16935, "total_steps": 19080, "loss": 0.1502, "lr": 1.902136641570712e-06, "epoch": 8.875786163522012, "percentage": 88.76, "elapsed_time": "1:18:16", "remaining_time": "0:09:54", "throughput": 2356.67, "total_tokens": 11068336}
|
| 3405 |
+
{"current_steps": 16940, "total_steps": 19080, "loss": 0.1988, "lr": 1.8933968162879235e-06, "epoch": 8.878406708595389, "percentage": 88.78, "elapsed_time": "1:18:17", "remaining_time": "0:09:53", "throughput": 2356.7, "total_tokens": 11071184}
|
| 3406 |
+
{"current_steps": 16945, "total_steps": 19080, "loss": 0.1067, "lr": 1.8846763255552097e-06, "epoch": 8.881027253668764, "percentage": 88.81, "elapsed_time": "1:18:18", "remaining_time": "0:09:52", "throughput": 2356.76, "total_tokens": 11074320}
|
| 3407 |
+
{"current_steps": 16950, "total_steps": 19080, "loss": 0.1709, "lr": 1.8759751766694811e-06, "epoch": 8.883647798742139, "percentage": 88.84, "elapsed_time": "1:18:20", "remaining_time": "0:09:50", "throughput": 2356.78, "total_tokens": 11077168}
|
| 3408 |
+
{"current_steps": 16955, "total_steps": 19080, "loss": 0.2012, "lr": 1.8672933769114636e-06, "epoch": 8.886268343815514, "percentage": 88.86, "elapsed_time": "1:18:21", "remaining_time": "0:09:49", "throughput": 2356.8, "total_tokens": 11080080}
|
| 3409 |
+
{"current_steps": 16960, "total_steps": 19080, "loss": 0.1648, "lr": 1.8586309335456908e-06, "epoch": 8.88888888888889, "percentage": 88.89, "elapsed_time": "1:18:22", "remaining_time": "0:09:47", "throughput": 2356.85, "total_tokens": 11083408}
|
| 3410 |
+
{"current_steps": 16965, "total_steps": 19080, "loss": 0.256, "lr": 1.8499878538204951e-06, "epoch": 8.891509433962264, "percentage": 88.92, "elapsed_time": "1:18:24", "remaining_time": "0:09:46", "throughput": 2356.92, "total_tokens": 11087184}
|
| 3411 |
+
{"current_steps": 16970, "total_steps": 19080, "loss": 0.1358, "lr": 1.8413641449680081e-06, "epoch": 8.89412997903564, "percentage": 88.94, "elapsed_time": "1:18:25", "remaining_time": "0:09:45", "throughput": 2356.99, "total_tokens": 11090864}
|
| 3412 |
+
{"current_steps": 16975, "total_steps": 19080, "loss": 0.151, "lr": 1.8327598142041658e-06, "epoch": 8.896750524109015, "percentage": 88.97, "elapsed_time": "1:18:26", "remaining_time": "0:09:43", "throughput": 2357.05, "total_tokens": 11094288}
|
| 3413 |
+
{"current_steps": 16980, "total_steps": 19080, "loss": 0.1296, "lr": 1.824174868728673e-06, "epoch": 8.89937106918239, "percentage": 88.99, "elapsed_time": "1:18:27", "remaining_time": "0:09:42", "throughput": 2357.02, "total_tokens": 11096656}
|
| 3414 |
+
{"current_steps": 16985, "total_steps": 19080, "loss": 0.2047, "lr": 1.815609315725017e-06, "epoch": 8.901991614255765, "percentage": 89.02, "elapsed_time": "1:18:29", "remaining_time": "0:09:40", "throughput": 2357.09, "total_tokens": 11100144}
|
| 3415 |
+
{"current_steps": 16990, "total_steps": 19080, "loss": 0.2016, "lr": 1.80706316236047e-06, "epoch": 8.90461215932914, "percentage": 89.05, "elapsed_time": "1:18:30", "remaining_time": "0:09:39", "throughput": 2357.13, "total_tokens": 11103440}
|
| 3416 |
+
{"current_steps": 16995, "total_steps": 19080, "loss": 0.182, "lr": 1.7985364157860562e-06, "epoch": 8.907232704402515, "percentage": 89.07, "elapsed_time": "1:18:31", "remaining_time": "0:09:38", "throughput": 2357.19, "total_tokens": 11107056}
|
| 3417 |
+
{"current_steps": 17000, "total_steps": 19080, "loss": 0.1837, "lr": 1.7900290831365713e-06, "epoch": 8.90985324947589, "percentage": 89.1, "elapsed_time": "1:18:33", "remaining_time": "0:09:36", "throughput": 2357.25, "total_tokens": 11110640}
|
| 3418 |
+
{"current_steps": 17005, "total_steps": 19080, "loss": 0.1504, "lr": 1.781541171530554e-06, "epoch": 8.912473794549266, "percentage": 89.12, "elapsed_time": "1:18:35", "remaining_time": "0:09:35", "throughput": 2357.46, "total_tokens": 11117008}
|
| 3419 |
+
{"current_steps": 17010, "total_steps": 19080, "loss": 0.1582, "lr": 1.7730726880703125e-06, "epoch": 8.915094339622641, "percentage": 89.15, "elapsed_time": "1:18:36", "remaining_time": "0:09:34", "throughput": 2357.51, "total_tokens": 11120176}
|
| 3420 |
+
{"current_steps": 17015, "total_steps": 19080, "loss": 0.2069, "lr": 1.7646236398418835e-06, "epoch": 8.917714884696016, "percentage": 89.18, "elapsed_time": "1:18:38", "remaining_time": "0:09:32", "throughput": 2357.56, "total_tokens": 11123504}
|
| 3421 |
+
{"current_steps": 17020, "total_steps": 19080, "loss": 0.2937, "lr": 1.7561940339150373e-06, "epoch": 8.920335429769391, "percentage": 89.2, "elapsed_time": "1:18:39", "remaining_time": "0:09:31", "throughput": 2357.54, "total_tokens": 11126000}
|
| 3422 |
+
{"current_steps": 17025, "total_steps": 19080, "loss": 0.1786, "lr": 1.7477838773432926e-06, "epoch": 8.922955974842766, "percentage": 89.23, "elapsed_time": "1:18:40", "remaining_time": "0:09:29", "throughput": 2357.61, "total_tokens": 11129584}
|
| 3423 |
+
{"current_steps": 17030, "total_steps": 19080, "loss": 0.2408, "lr": 1.7393931771638839e-06, "epoch": 8.925576519916142, "percentage": 89.26, "elapsed_time": "1:18:41", "remaining_time": "0:09:28", "throughput": 2357.63, "total_tokens": 11132240}
|
| 3424 |
+
{"current_steps": 17035, "total_steps": 19080, "loss": 0.2077, "lr": 1.7310219403977563e-06, "epoch": 8.928197064989519, "percentage": 89.28, "elapsed_time": "1:18:42", "remaining_time": "0:09:26", "throughput": 2357.67, "total_tokens": 11135120}
|
| 3425 |
+
{"current_steps": 17040, "total_steps": 19080, "loss": 0.1225, "lr": 1.7226701740495926e-06, "epoch": 8.930817610062894, "percentage": 89.31, "elapsed_time": "1:18:44", "remaining_time": "0:09:25", "throughput": 2357.69, "total_tokens": 11137968}
|
| 3426 |
+
{"current_steps": 17045, "total_steps": 19080, "loss": 0.1799, "lr": 1.714337885107753e-06, "epoch": 8.933438155136269, "percentage": 89.33, "elapsed_time": "1:18:45", "remaining_time": "0:09:24", "throughput": 2357.67, "total_tokens": 11140368}
|
| 3427 |
+
{"current_steps": 17050, "total_steps": 19080, "loss": 0.255, "lr": 1.7060250805443296e-06, "epoch": 8.936058700209644, "percentage": 89.36, "elapsed_time": "1:18:46", "remaining_time": "0:09:22", "throughput": 2357.82, "total_tokens": 11145168}
|
| 3428 |
+
{"current_steps": 17055, "total_steps": 19080, "loss": 0.2406, "lr": 1.6977317673150916e-06, "epoch": 8.93867924528302, "percentage": 89.39, "elapsed_time": "1:18:48", "remaining_time": "0:09:21", "throughput": 2357.9, "total_tokens": 11149008}
|
| 3429 |
+
{"current_steps": 17060, "total_steps": 19080, "loss": 0.1988, "lr": 1.6894579523595022e-06, "epoch": 8.941299790356394, "percentage": 89.41, "elapsed_time": "1:18:50", "remaining_time": "0:09:20", "throughput": 2358.14, "total_tokens": 11156336}
|
| 3430 |
+
{"current_steps": 17065, "total_steps": 19080, "loss": 0.2747, "lr": 1.6812036426007176e-06, "epoch": 8.94392033542977, "percentage": 89.44, "elapsed_time": "1:18:52", "remaining_time": "0:09:18", "throughput": 2358.15, "total_tokens": 11159152}
|
| 3431 |
+
{"current_steps": 17070, "total_steps": 19080, "loss": 0.2063, "lr": 1.6729688449455689e-06, "epoch": 8.946540880503145, "percentage": 89.47, "elapsed_time": "1:18:53", "remaining_time": "0:09:17", "throughput": 2358.18, "total_tokens": 11162096}
|
| 3432 |
+
{"current_steps": 17075, "total_steps": 19080, "loss": 0.1796, "lr": 1.6647535662845466e-06, "epoch": 8.94916142557652, "percentage": 89.49, "elapsed_time": "1:18:54", "remaining_time": "0:09:15", "throughput": 2358.23, "total_tokens": 11165200}
|
| 3433 |
+
{"current_steps": 17080, "total_steps": 19080, "loss": 0.2749, "lr": 1.656557813491838e-06, "epoch": 8.951781970649895, "percentage": 89.52, "elapsed_time": "1:18:55", "remaining_time": "0:09:14", "throughput": 2358.27, "total_tokens": 11168368}
|
| 3434 |
+
{"current_steps": 17085, "total_steps": 19080, "loss": 0.2703, "lr": 1.6483815934252578e-06, "epoch": 8.95440251572327, "percentage": 89.54, "elapsed_time": "1:18:57", "remaining_time": "0:09:13", "throughput": 2358.36, "total_tokens": 11172048}
|
| 3435 |
+
{"current_steps": 17090, "total_steps": 19080, "loss": 0.2129, "lr": 1.6402249129263025e-06, "epoch": 8.957023060796645, "percentage": 89.57, "elapsed_time": "1:18:58", "remaining_time": "0:09:11", "throughput": 2358.41, "total_tokens": 11175600}
|
| 3436 |
+
{"current_steps": 17095, "total_steps": 19080, "loss": 0.193, "lr": 1.6320877788201127e-06, "epoch": 8.95964360587002, "percentage": 89.6, "elapsed_time": "1:18:59", "remaining_time": "0:09:10", "throughput": 2358.43, "total_tokens": 11178576}
|
| 3437 |
+
{"current_steps": 17100, "total_steps": 19080, "loss": 0.2514, "lr": 1.6239701979154614e-06, "epoch": 8.962264150943396, "percentage": 89.62, "elapsed_time": "1:19:00", "remaining_time": "0:09:08", "throughput": 2358.41, "total_tokens": 11181168}
|
| 3438 |
+
{"current_steps": 17105, "total_steps": 19080, "loss": 0.16, "lr": 1.6158721770047762e-06, "epoch": 8.964884696016771, "percentage": 89.65, "elapsed_time": "1:19:02", "remaining_time": "0:09:07", "throughput": 2358.45, "total_tokens": 11184400}
|
| 3439 |
+
{"current_steps": 17110, "total_steps": 19080, "loss": 0.272, "lr": 1.6077937228641093e-06, "epoch": 8.967505241090146, "percentage": 89.68, "elapsed_time": "1:19:03", "remaining_time": "0:09:06", "throughput": 2358.49, "total_tokens": 11187536}
|
| 3440 |
+
{"current_steps": 17115, "total_steps": 19080, "loss": 0.223, "lr": 1.5997348422531395e-06, "epoch": 8.970125786163521, "percentage": 89.7, "elapsed_time": "1:19:05", "remaining_time": "0:09:04", "throughput": 2358.55, "total_tokens": 11191568}
|
| 3441 |
+
{"current_steps": 17120, "total_steps": 19080, "loss": 0.1713, "lr": 1.5916955419151725e-06, "epoch": 8.972746331236896, "percentage": 89.73, "elapsed_time": "1:19:06", "remaining_time": "0:09:03", "throughput": 2358.6, "total_tokens": 11195024}
|
| 3442 |
+
{"current_steps": 17125, "total_steps": 19080, "loss": 0.2585, "lr": 1.5836758285771303e-06, "epoch": 8.975366876310272, "percentage": 89.75, "elapsed_time": "1:19:07", "remaining_time": "0:09:02", "throughput": 2358.65, "total_tokens": 11198480}
|
| 3443 |
+
{"current_steps": 17130, "total_steps": 19080, "loss": 0.2272, "lr": 1.5756757089495366e-06, "epoch": 8.977987421383649, "percentage": 89.78, "elapsed_time": "1:19:09", "remaining_time": "0:09:00", "throughput": 2358.71, "total_tokens": 11201712}
|
| 3444 |
+
{"current_steps": 17135, "total_steps": 19080, "loss": 0.164, "lr": 1.5676951897265313e-06, "epoch": 8.980607966457024, "percentage": 89.81, "elapsed_time": "1:19:10", "remaining_time": "0:08:59", "throughput": 2358.75, "total_tokens": 11204720}
|
| 3445 |
+
{"current_steps": 17140, "total_steps": 19080, "loss": 0.241, "lr": 1.5597342775858476e-06, "epoch": 8.983228511530399, "percentage": 89.83, "elapsed_time": "1:19:11", "remaining_time": "0:08:57", "throughput": 2358.77, "total_tokens": 11207440}
|
| 3446 |
+
{"current_steps": 17145, "total_steps": 19080, "loss": 0.2196, "lr": 1.5517929791888125e-06, "epoch": 8.985849056603774, "percentage": 89.86, "elapsed_time": "1:19:12", "remaining_time": "0:08:56", "throughput": 2358.82, "total_tokens": 11210384}
|
| 3447 |
+
{"current_steps": 17150, "total_steps": 19080, "loss": 0.2042, "lr": 1.5438713011803385e-06, "epoch": 8.98846960167715, "percentage": 89.88, "elapsed_time": "1:19:13", "remaining_time": "0:08:54", "throughput": 2358.84, "total_tokens": 11213392}
|
| 3448 |
+
{"current_steps": 17155, "total_steps": 19080, "loss": 0.2406, "lr": 1.535969250188926e-06, "epoch": 8.991090146750524, "percentage": 89.91, "elapsed_time": "1:19:14", "remaining_time": "0:08:53", "throughput": 2358.84, "total_tokens": 11216016}
|
| 3449 |
+
{"current_steps": 17160, "total_steps": 19080, "loss": 0.2404, "lr": 1.5280868328266528e-06, "epoch": 8.9937106918239, "percentage": 89.94, "elapsed_time": "1:19:15", "remaining_time": "0:08:52", "throughput": 2358.79, "total_tokens": 11218160}
|
| 3450 |
+
{"current_steps": 17165, "total_steps": 19080, "loss": 0.3821, "lr": 1.520224055689165e-06, "epoch": 8.996331236897275, "percentage": 89.96, "elapsed_time": "1:19:17", "remaining_time": "0:08:50", "throughput": 2358.86, "total_tokens": 11222000}
|
| 3451 |
+
{"current_steps": 17170, "total_steps": 19080, "loss": 0.2188, "lr": 1.5123809253556692e-06, "epoch": 8.99895178197065, "percentage": 89.99, "elapsed_time": "1:19:18", "remaining_time": "0:08:49", "throughput": 2358.9, "total_tokens": 11224816}
|
| 3452 |
+
{"current_steps": 17172, "total_steps": 19080, "eval_loss": 0.6604991555213928, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "1:19:34", "remaining_time": "0:08:50", "throughput": 2350.9, "total_tokens": 11225416}
|
| 3453 |
+
{"current_steps": 17175, "total_steps": 19080, "loss": 0.247, "lr": 1.5045574483889463e-06, "epoch": 9.001572327044025, "percentage": 90.02, "elapsed_time": "1:19:37", "remaining_time": "0:08:49", "throughput": 2350.15, "total_tokens": 11227112}
|
| 3454 |
+
{"current_steps": 17180, "total_steps": 19080, "loss": 0.1861, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "1:19:38", "remaining_time": "0:08:48", "throughput": 2350.15, "total_tokens": 11229544}
|
| 3455 |
+
{"current_steps": 17185, "total_steps": 19080, "loss": 0.2323, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "1:19:39", "remaining_time": "0:08:47", "throughput": 2350.24, "total_tokens": 11233640}
|
| 3456 |
+
{"current_steps": 17190, "total_steps": 19080, "loss": 0.1362, "lr": 1.481205003070424e-06, "epoch": 9.00943396226415, "percentage": 90.09, "elapsed_time": "1:19:41", "remaining_time": "0:08:45", "throughput": 2350.24, "total_tokens": 11236552}
|