rbelanec commited on
Commit
cf6a5b7
·
verified ·
1 Parent(s): cb556ba

Training in progress, step 7434

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +165 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad2d09268c5f01d9a49698d2838cf519fa70fbf7eb162da42d09ff1cf72a8980
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a79dd398e2ccd18c8ee1e384d99c7793c47922d1fe4049cdc1c660d33b6608d
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -1337,3 +1337,168 @@
1337
  {"current_steps": 6645, "total_steps": 16520, "loss": 0.0, "lr": 3.73349487217594e-05, "epoch": 8.044794188861985, "percentage": 40.22, "elapsed_time": "0:23:01", "remaining_time": "0:34:13", "throughput": 1970.12, "total_tokens": 2721720}
1338
  {"current_steps": 6650, "total_steps": 16520, "loss": 0.0, "lr": 3.731196826712805e-05, "epoch": 8.05084745762712, "percentage": 40.25, "elapsed_time": "0:23:02", "remaining_time": "0:34:11", "throughput": 1970.19, "total_tokens": 2723800}
1339
  {"current_steps": 6655, "total_steps": 16520, "loss": 0.0001, "lr": 3.7288974070112174e-05, "epoch": 8.056900726392252, "percentage": 40.28, "elapsed_time": "0:23:03", "remaining_time": "0:34:10", "throughput": 1970.25, "total_tokens": 2725848}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1337
  {"current_steps": 6645, "total_steps": 16520, "loss": 0.0, "lr": 3.73349487217594e-05, "epoch": 8.044794188861985, "percentage": 40.22, "elapsed_time": "0:23:01", "remaining_time": "0:34:13", "throughput": 1970.12, "total_tokens": 2721720}
1338
  {"current_steps": 6650, "total_steps": 16520, "loss": 0.0, "lr": 3.731196826712805e-05, "epoch": 8.05084745762712, "percentage": 40.25, "elapsed_time": "0:23:02", "remaining_time": "0:34:11", "throughput": 1970.19, "total_tokens": 2723800}
1339
  {"current_steps": 6655, "total_steps": 16520, "loss": 0.0001, "lr": 3.7288974070112174e-05, "epoch": 8.056900726392252, "percentage": 40.28, "elapsed_time": "0:23:03", "remaining_time": "0:34:10", "throughput": 1970.25, "total_tokens": 2725848}
1340
+ {"current_steps": 6660, "total_steps": 16520, "loss": 0.0, "lr": 3.726596615637746e-05, "epoch": 8.062953995157384, "percentage": 40.31, "elapsed_time": "0:23:04", "remaining_time": "0:34:09", "throughput": 1970.25, "total_tokens": 2727832}
1341
+ {"current_steps": 6665, "total_steps": 16520, "loss": 0.0037, "lr": 3.7242944551604914e-05, "epoch": 8.069007263922519, "percentage": 40.35, "elapsed_time": "0:23:05", "remaining_time": "0:34:08", "throughput": 1970.23, "total_tokens": 2729752}
1342
+ {"current_steps": 6670, "total_steps": 16520, "loss": 0.0, "lr": 3.72199092814908e-05, "epoch": 8.075060532687651, "percentage": 40.38, "elapsed_time": "0:23:06", "remaining_time": "0:34:07", "throughput": 1970.33, "total_tokens": 2731864}
1343
+ {"current_steps": 6675, "total_steps": 16520, "loss": 0.0, "lr": 3.719686037174664e-05, "epoch": 8.081113801452785, "percentage": 40.41, "elapsed_time": "0:23:07", "remaining_time": "0:34:06", "throughput": 1970.34, "total_tokens": 2733816}
1344
+ {"current_steps": 6680, "total_steps": 16520, "loss": 0.0, "lr": 3.7173797848099204e-05, "epoch": 8.087167070217918, "percentage": 40.44, "elapsed_time": "0:23:08", "remaining_time": "0:34:05", "throughput": 1970.37, "total_tokens": 2735832}
1345
+ {"current_steps": 6685, "total_steps": 16520, "loss": 0.0, "lr": 3.715072173629043e-05, "epoch": 8.09322033898305, "percentage": 40.47, "elapsed_time": "0:23:09", "remaining_time": "0:34:04", "throughput": 1970.52, "total_tokens": 2738040}
1346
+ {"current_steps": 6690, "total_steps": 16520, "loss": 0.0039, "lr": 3.7127632062077446e-05, "epoch": 8.099273607748184, "percentage": 40.5, "elapsed_time": "0:23:10", "remaining_time": "0:34:03", "throughput": 1970.59, "total_tokens": 2740088}
1347
+ {"current_steps": 6695, "total_steps": 16520, "loss": 0.0025, "lr": 3.7104528851232496e-05, "epoch": 8.105326876513317, "percentage": 40.53, "elapsed_time": "0:23:11", "remaining_time": "0:34:02", "throughput": 1970.65, "total_tokens": 2742136}
1348
+ {"current_steps": 6700, "total_steps": 16520, "loss": 0.127, "lr": 3.708141212954295e-05, "epoch": 8.111380145278451, "percentage": 40.56, "elapsed_time": "0:23:12", "remaining_time": "0:34:00", "throughput": 1970.73, "total_tokens": 2744216}
1349
+ {"current_steps": 6705, "total_steps": 16520, "loss": 0.0001, "lr": 3.705828192281126e-05, "epoch": 8.117433414043584, "percentage": 40.59, "elapsed_time": "0:23:13", "remaining_time": "0:33:59", "throughput": 1970.75, "total_tokens": 2746200}
1350
+ {"current_steps": 6710, "total_steps": 16520, "loss": 0.0002, "lr": 3.703513825685489e-05, "epoch": 8.123486682808716, "percentage": 40.62, "elapsed_time": "0:23:14", "remaining_time": "0:33:58", "throughput": 1970.82, "total_tokens": 2748248}
1351
+ {"current_steps": 6715, "total_steps": 16520, "loss": 0.0004, "lr": 3.7011981157506405e-05, "epoch": 8.12953995157385, "percentage": 40.65, "elapsed_time": "0:23:15", "remaining_time": "0:33:57", "throughput": 1970.89, "total_tokens": 2750296}
1352
+ {"current_steps": 6720, "total_steps": 16520, "loss": 0.0406, "lr": 3.6988810650613286e-05, "epoch": 8.135593220338983, "percentage": 40.68, "elapsed_time": "0:23:16", "remaining_time": "0:33:56", "throughput": 1970.91, "total_tokens": 2752280}
1353
+ {"current_steps": 6725, "total_steps": 16520, "loss": 0.0004, "lr": 3.6965626762038024e-05, "epoch": 8.141646489104117, "percentage": 40.71, "elapsed_time": "0:23:17", "remaining_time": "0:33:55", "throughput": 1971.02, "total_tokens": 2754424}
1354
+ {"current_steps": 6730, "total_steps": 16520, "loss": 0.0204, "lr": 3.694242951765803e-05, "epoch": 8.14769975786925, "percentage": 40.74, "elapsed_time": "0:23:18", "remaining_time": "0:33:54", "throughput": 1970.96, "total_tokens": 2756312}
1355
+ {"current_steps": 6735, "total_steps": 16520, "loss": 0.0004, "lr": 3.691921894336563e-05, "epoch": 8.153753026634382, "percentage": 40.77, "elapsed_time": "0:23:19", "remaining_time": "0:33:53", "throughput": 1971.0, "total_tokens": 2758328}
1356
+ {"current_steps": 6740, "total_steps": 16520, "loss": 0.0001, "lr": 3.689599506506802e-05, "epoch": 8.159806295399516, "percentage": 40.8, "elapsed_time": "0:23:20", "remaining_time": "0:33:52", "throughput": 1971.07, "total_tokens": 2760408}
1357
+ {"current_steps": 6745, "total_steps": 16520, "loss": 0.0001, "lr": 3.6872757908687255e-05, "epoch": 8.165859564164649, "percentage": 40.83, "elapsed_time": "0:23:21", "remaining_time": "0:33:51", "throughput": 1971.08, "total_tokens": 2762392}
1358
+ {"current_steps": 6750, "total_steps": 16520, "loss": 0.0002, "lr": 3.684950750016021e-05, "epoch": 8.171912832929783, "percentage": 40.86, "elapsed_time": "0:23:22", "remaining_time": "0:33:49", "throughput": 1971.14, "total_tokens": 2764472}
1359
+ {"current_steps": 6755, "total_steps": 16520, "loss": 0.0002, "lr": 3.6826243865438534e-05, "epoch": 8.177966101694915, "percentage": 40.89, "elapsed_time": "0:23:23", "remaining_time": "0:33:48", "throughput": 1971.21, "total_tokens": 2766552}
1360
+ {"current_steps": 6760, "total_steps": 16520, "loss": 0.0002, "lr": 3.680296703048867e-05, "epoch": 8.184019370460048, "percentage": 40.92, "elapsed_time": "0:23:24", "remaining_time": "0:33:47", "throughput": 1971.29, "total_tokens": 2768664}
1361
+ {"current_steps": 6765, "total_steps": 16520, "loss": 0.0001, "lr": 3.677967702129177e-05, "epoch": 8.190072639225182, "percentage": 40.95, "elapsed_time": "0:23:25", "remaining_time": "0:33:46", "throughput": 1971.36, "total_tokens": 2770744}
1362
+ {"current_steps": 6770, "total_steps": 16520, "loss": 0.0007, "lr": 3.67563738638437e-05, "epoch": 8.196125907990314, "percentage": 40.98, "elapsed_time": "0:23:26", "remaining_time": "0:33:45", "throughput": 1971.35, "total_tokens": 2772664}
1363
+ {"current_steps": 6775, "total_steps": 16520, "loss": 0.0, "lr": 3.673305758415499e-05, "epoch": 8.202179176755449, "percentage": 41.01, "elapsed_time": "0:23:27", "remaining_time": "0:33:44", "throughput": 1971.41, "total_tokens": 2774744}
1364
+ {"current_steps": 6780, "total_steps": 16520, "loss": 0.0001, "lr": 3.6709728208250845e-05, "epoch": 8.208232445520581, "percentage": 41.04, "elapsed_time": "0:23:28", "remaining_time": "0:33:43", "throughput": 1971.53, "total_tokens": 2776920}
1365
+ {"current_steps": 6785, "total_steps": 16520, "loss": 0.0001, "lr": 3.6686385762171055e-05, "epoch": 8.214285714285714, "percentage": 41.07, "elapsed_time": "0:23:29", "remaining_time": "0:33:42", "throughput": 1971.64, "total_tokens": 2779064}
1366
+ {"current_steps": 6790, "total_steps": 16520, "loss": 0.0001, "lr": 3.666303027197003e-05, "epoch": 8.220338983050848, "percentage": 41.1, "elapsed_time": "0:23:30", "remaining_time": "0:33:41", "throughput": 1971.74, "total_tokens": 2781176}
1367
+ {"current_steps": 6795, "total_steps": 16520, "loss": 0.0001, "lr": 3.663966176371671e-05, "epoch": 8.22639225181598, "percentage": 41.13, "elapsed_time": "0:23:31", "remaining_time": "0:33:40", "throughput": 1971.83, "total_tokens": 2783288}
1368
+ {"current_steps": 6800, "total_steps": 16520, "loss": 0.0012, "lr": 3.661628026349458e-05, "epoch": 8.232445520581114, "percentage": 41.16, "elapsed_time": "0:23:32", "remaining_time": "0:33:39", "throughput": 1971.93, "total_tokens": 2785400}
1369
+ {"current_steps": 6805, "total_steps": 16520, "loss": 0.0015, "lr": 3.659288579740163e-05, "epoch": 8.238498789346247, "percentage": 41.19, "elapsed_time": "0:23:33", "remaining_time": "0:33:37", "throughput": 1972.01, "total_tokens": 2787480}
1370
+ {"current_steps": 6810, "total_steps": 16520, "loss": 0.0, "lr": 3.656947839155032e-05, "epoch": 8.24455205811138, "percentage": 41.22, "elapsed_time": "0:23:34", "remaining_time": "0:33:36", "throughput": 1972.07, "total_tokens": 2789528}
1371
+ {"current_steps": 6815, "total_steps": 16520, "loss": 0.0, "lr": 3.654605807206754e-05, "epoch": 8.250605326876514, "percentage": 41.25, "elapsed_time": "0:23:35", "remaining_time": "0:33:35", "throughput": 1972.12, "total_tokens": 2791576}
1372
+ {"current_steps": 6820, "total_steps": 16520, "loss": 0.0005, "lr": 3.652262486509462e-05, "epoch": 8.256658595641646, "percentage": 41.28, "elapsed_time": "0:23:36", "remaining_time": "0:33:34", "throughput": 1972.21, "total_tokens": 2793688}
1373
+ {"current_steps": 6825, "total_steps": 16520, "loss": 0.0568, "lr": 3.649917879678724e-05, "epoch": 8.26271186440678, "percentage": 41.31, "elapsed_time": "0:23:37", "remaining_time": "0:33:33", "throughput": 1972.27, "total_tokens": 2795736}
1374
+ {"current_steps": 6830, "total_steps": 16520, "loss": 0.0, "lr": 3.647571989331548e-05, "epoch": 8.268765133171913, "percentage": 41.34, "elapsed_time": "0:23:38", "remaining_time": "0:33:32", "throughput": 1972.37, "total_tokens": 2797848}
1375
+ {"current_steps": 6835, "total_steps": 16520, "loss": 0.0978, "lr": 3.6452248180863694e-05, "epoch": 8.274818401937045, "percentage": 41.37, "elapsed_time": "0:23:39", "remaining_time": "0:33:31", "throughput": 1972.45, "total_tokens": 2799928}
1376
+ {"current_steps": 6840, "total_steps": 16520, "loss": 0.0001, "lr": 3.642876368563059e-05, "epoch": 8.28087167070218, "percentage": 41.4, "elapsed_time": "0:23:40", "remaining_time": "0:33:30", "throughput": 1972.49, "total_tokens": 2801944}
1377
+ {"current_steps": 6845, "total_steps": 16520, "loss": 0.0004, "lr": 3.6405266433829075e-05, "epoch": 8.286924939467312, "percentage": 41.43, "elapsed_time": "0:23:41", "remaining_time": "0:33:29", "throughput": 1972.56, "total_tokens": 2804024}
1378
+ {"current_steps": 6850, "total_steps": 16520, "loss": 0.0002, "lr": 3.6381756451686375e-05, "epoch": 8.292978208232446, "percentage": 41.46, "elapsed_time": "0:23:42", "remaining_time": "0:33:28", "throughput": 1972.7, "total_tokens": 2806200}
1379
+ {"current_steps": 6855, "total_steps": 16520, "loss": 0.0001, "lr": 3.635823376544385e-05, "epoch": 8.299031476997579, "percentage": 41.5, "elapsed_time": "0:23:43", "remaining_time": "0:33:27", "throughput": 1972.72, "total_tokens": 2808184}
1380
+ {"current_steps": 6860, "total_steps": 16520, "loss": 0.0005, "lr": 3.6334698401357107e-05, "epoch": 8.305084745762711, "percentage": 41.53, "elapsed_time": "0:23:44", "remaining_time": "0:33:25", "throughput": 1972.7, "total_tokens": 2810104}
1381
+ {"current_steps": 6865, "total_steps": 16520, "loss": 0.0001, "lr": 3.6311150385695845e-05, "epoch": 8.311138014527845, "percentage": 41.56, "elapsed_time": "0:23:45", "remaining_time": "0:33:24", "throughput": 1972.75, "total_tokens": 2812120}
1382
+ {"current_steps": 6870, "total_steps": 16520, "loss": 0.0001, "lr": 3.6287589744743925e-05, "epoch": 8.317191283292978, "percentage": 41.59, "elapsed_time": "0:23:46", "remaining_time": "0:33:23", "throughput": 1972.84, "total_tokens": 2814232}
1383
+ {"current_steps": 6875, "total_steps": 16520, "loss": 0.0004, "lr": 3.6264016504799274e-05, "epoch": 8.323244552058112, "percentage": 41.62, "elapsed_time": "0:23:47", "remaining_time": "0:33:22", "throughput": 1972.86, "total_tokens": 2816216}
1384
+ {"current_steps": 6880, "total_steps": 16520, "loss": 0.0, "lr": 3.624043069217391e-05, "epoch": 8.329297820823244, "percentage": 41.65, "elapsed_time": "0:23:48", "remaining_time": "0:33:21", "throughput": 1972.95, "total_tokens": 2818296}
1385
+ {"current_steps": 6885, "total_steps": 16520, "loss": 0.0003, "lr": 3.621683233319386e-05, "epoch": 8.335351089588377, "percentage": 41.68, "elapsed_time": "0:23:49", "remaining_time": "0:33:20", "throughput": 1972.99, "total_tokens": 2820312}
1386
+ {"current_steps": 6890, "total_steps": 16520, "loss": 0.0033, "lr": 3.619322145419915e-05, "epoch": 8.341404358353511, "percentage": 41.71, "elapsed_time": "0:23:50", "remaining_time": "0:33:19", "throughput": 1973.14, "total_tokens": 2822520}
1387
+ {"current_steps": 6895, "total_steps": 16520, "loss": 0.0, "lr": 3.616959808154381e-05, "epoch": 8.347457627118644, "percentage": 41.74, "elapsed_time": "0:23:51", "remaining_time": "0:33:18", "throughput": 1973.24, "total_tokens": 2824696}
1388
+ {"current_steps": 6900, "total_steps": 16520, "loss": 0.0, "lr": 3.61459622415958e-05, "epoch": 8.353510895883778, "percentage": 41.77, "elapsed_time": "0:23:52", "remaining_time": "0:33:17", "throughput": 1973.29, "total_tokens": 2826744}
1389
+ {"current_steps": 6905, "total_steps": 16520, "loss": 0.0, "lr": 3.6122313960736983e-05, "epoch": 8.35956416464891, "percentage": 41.8, "elapsed_time": "0:23:53", "remaining_time": "0:33:16", "throughput": 1973.35, "total_tokens": 2828792}
1390
+ {"current_steps": 6910, "total_steps": 16520, "loss": 0.0, "lr": 3.609865326536312e-05, "epoch": 8.365617433414043, "percentage": 41.83, "elapsed_time": "0:23:54", "remaining_time": "0:33:15", "throughput": 1973.44, "total_tokens": 2830904}
1391
+ {"current_steps": 6915, "total_steps": 16520, "loss": 0.0, "lr": 3.607498018188385e-05, "epoch": 8.371670702179177, "percentage": 41.86, "elapsed_time": "0:23:55", "remaining_time": "0:33:13", "throughput": 1973.52, "total_tokens": 2832984}
1392
+ {"current_steps": 6920, "total_steps": 16520, "loss": 0.0, "lr": 3.605129473672259e-05, "epoch": 8.37772397094431, "percentage": 41.89, "elapsed_time": "0:23:56", "remaining_time": "0:33:12", "throughput": 1973.56, "total_tokens": 2835000}
1393
+ {"current_steps": 6925, "total_steps": 16520, "loss": 0.083, "lr": 3.602759695631659e-05, "epoch": 8.383777239709444, "percentage": 41.92, "elapsed_time": "0:23:57", "remaining_time": "0:33:11", "throughput": 1973.62, "total_tokens": 2837048}
1394
+ {"current_steps": 6930, "total_steps": 16520, "loss": 0.0001, "lr": 3.6003886867116875e-05, "epoch": 8.389830508474576, "percentage": 41.95, "elapsed_time": "0:23:58", "remaining_time": "0:33:10", "throughput": 1973.71, "total_tokens": 2839160}
1395
+ {"current_steps": 6935, "total_steps": 16520, "loss": 0.0102, "lr": 3.5980164495588176e-05, "epoch": 8.39588377723971, "percentage": 41.98, "elapsed_time": "0:23:59", "remaining_time": "0:33:09", "throughput": 1973.77, "total_tokens": 2841208}
1396
+ {"current_steps": 6940, "total_steps": 16520, "loss": 0.0, "lr": 3.5956429868208974e-05, "epoch": 8.401937046004843, "percentage": 42.01, "elapsed_time": "0:24:00", "remaining_time": "0:33:08", "throughput": 1973.87, "total_tokens": 2843320}
1397
+ {"current_steps": 6945, "total_steps": 16520, "loss": 0.0003, "lr": 3.593268301147139e-05, "epoch": 8.407990314769975, "percentage": 42.04, "elapsed_time": "0:24:01", "remaining_time": "0:33:07", "throughput": 1973.92, "total_tokens": 2845368}
1398
+ {"current_steps": 6950, "total_steps": 16520, "loss": 0.0304, "lr": 3.590892395188122e-05, "epoch": 8.41404358353511, "percentage": 42.07, "elapsed_time": "0:24:02", "remaining_time": "0:33:06", "throughput": 1973.94, "total_tokens": 2847384}
1399
+ {"current_steps": 6955, "total_steps": 16520, "loss": 0.0001, "lr": 3.5885152715957874e-05, "epoch": 8.420096852300242, "percentage": 42.1, "elapsed_time": "0:24:03", "remaining_time": "0:33:05", "throughput": 1973.98, "total_tokens": 2849368}
1400
+ {"current_steps": 6960, "total_steps": 16520, "loss": 0.0, "lr": 3.5861369330234345e-05, "epoch": 8.426150121065376, "percentage": 42.13, "elapsed_time": "0:24:04", "remaining_time": "0:33:04", "throughput": 1974.03, "total_tokens": 2851384}
1401
+ {"current_steps": 6965, "total_steps": 16520, "loss": 0.0, "lr": 3.583757382125721e-05, "epoch": 8.432203389830509, "percentage": 42.16, "elapsed_time": "0:24:05", "remaining_time": "0:33:02", "throughput": 1974.05, "total_tokens": 2853336}
1402
+ {"current_steps": 6970, "total_steps": 16520, "loss": 0.0716, "lr": 3.5813766215586554e-05, "epoch": 8.438256658595641, "percentage": 42.19, "elapsed_time": "0:24:06", "remaining_time": "0:33:01", "throughput": 1974.08, "total_tokens": 2855352}
1403
+ {"current_steps": 6975, "total_steps": 16520, "loss": 0.0299, "lr": 3.578994653979598e-05, "epoch": 8.444309927360775, "percentage": 42.22, "elapsed_time": "0:24:07", "remaining_time": "0:33:00", "throughput": 1974.14, "total_tokens": 2857368}
1404
+ {"current_steps": 6980, "total_steps": 16520, "loss": 0.0, "lr": 3.576611482047254e-05, "epoch": 8.450363196125908, "percentage": 42.25, "elapsed_time": "0:24:08", "remaining_time": "0:32:59", "throughput": 1974.19, "total_tokens": 2859416}
1405
+ {"current_steps": 6985, "total_steps": 16520, "loss": 0.0003, "lr": 3.574227108421676e-05, "epoch": 8.456416464891042, "percentage": 42.28, "elapsed_time": "0:24:09", "remaining_time": "0:32:58", "throughput": 1974.25, "total_tokens": 2861464}
1406
+ {"current_steps": 6990, "total_steps": 16520, "loss": 0.0018, "lr": 3.5718415357642567e-05, "epoch": 8.462469733656174, "percentage": 42.31, "elapsed_time": "0:24:10", "remaining_time": "0:32:57", "throughput": 1974.37, "total_tokens": 2863608}
1407
+ {"current_steps": 6995, "total_steps": 16520, "loss": 0.0008, "lr": 3.5694547667377256e-05, "epoch": 8.468523002421307, "percentage": 42.34, "elapsed_time": "0:24:11", "remaining_time": "0:32:56", "throughput": 1974.42, "total_tokens": 2865688}
1408
+ {"current_steps": 7000, "total_steps": 16520, "loss": 0.0, "lr": 3.56706680400615e-05, "epoch": 8.474576271186441, "percentage": 42.37, "elapsed_time": "0:24:12", "remaining_time": "0:32:55", "throughput": 1974.44, "total_tokens": 2867640}
1409
+ {"current_steps": 7005, "total_steps": 16520, "loss": 0.0, "lr": 3.5646776502349274e-05, "epoch": 8.480629539951574, "percentage": 42.4, "elapsed_time": "0:24:13", "remaining_time": "0:32:54", "throughput": 1974.47, "total_tokens": 2869624}
1410
+ {"current_steps": 7010, "total_steps": 16520, "loss": 0.0001, "lr": 3.562287308090786e-05, "epoch": 8.486682808716708, "percentage": 42.43, "elapsed_time": "0:24:14", "remaining_time": "0:32:53", "throughput": 1974.58, "total_tokens": 2871768}
1411
+ {"current_steps": 7015, "total_steps": 16520, "loss": 0.0, "lr": 3.559895780241781e-05, "epoch": 8.49273607748184, "percentage": 42.46, "elapsed_time": "0:24:15", "remaining_time": "0:32:51", "throughput": 1974.61, "total_tokens": 2873784}
1412
+ {"current_steps": 7020, "total_steps": 16520, "loss": 0.0, "lr": 3.55750306935729e-05, "epoch": 8.498789346246973, "percentage": 42.49, "elapsed_time": "0:24:16", "remaining_time": "0:32:50", "throughput": 1974.67, "total_tokens": 2875832}
1413
+ {"current_steps": 7025, "total_steps": 16520, "loss": 0.1429, "lr": 3.5551091781080104e-05, "epoch": 8.504842615012107, "percentage": 42.52, "elapsed_time": "0:24:17", "remaining_time": "0:32:49", "throughput": 1974.69, "total_tokens": 2877816}
1414
+ {"current_steps": 7030, "total_steps": 16520, "loss": 0.0025, "lr": 3.552714109165958e-05, "epoch": 8.51089588377724, "percentage": 42.55, "elapsed_time": "0:24:18", "remaining_time": "0:32:48", "throughput": 1974.8, "total_tokens": 2879960}
1415
+ {"current_steps": 7035, "total_steps": 16520, "loss": 0.0003, "lr": 3.550317865204465e-05, "epoch": 8.516949152542374, "percentage": 42.58, "elapsed_time": "0:24:19", "remaining_time": "0:32:47", "throughput": 1974.89, "total_tokens": 2882040}
1416
+ {"current_steps": 7040, "total_steps": 16520, "loss": 0.0004, "lr": 3.547920448898171e-05, "epoch": 8.523002421307506, "percentage": 42.62, "elapsed_time": "0:24:20", "remaining_time": "0:32:46", "throughput": 1975.03, "total_tokens": 2884248}
1417
+ {"current_steps": 7045, "total_steps": 16520, "loss": 0.0133, "lr": 3.545521862923028e-05, "epoch": 8.529055690072639, "percentage": 42.65, "elapsed_time": "0:24:21", "remaining_time": "0:32:45", "throughput": 1975.07, "total_tokens": 2886264}
1418
+ {"current_steps": 7050, "total_steps": 16520, "loss": 0.0001, "lr": 3.5431221099562914e-05, "epoch": 8.535108958837773, "percentage": 42.68, "elapsed_time": "0:24:22", "remaining_time": "0:32:44", "throughput": 1975.1, "total_tokens": 2888248}
1419
+ {"current_steps": 7055, "total_steps": 16520, "loss": 0.005, "lr": 3.540721192676521e-05, "epoch": 8.541162227602905, "percentage": 42.71, "elapsed_time": "0:24:23", "remaining_time": "0:32:43", "throughput": 1975.03, "total_tokens": 2890072}
1420
+ {"current_steps": 7060, "total_steps": 16520, "loss": 0.0002, "lr": 3.538319113763571e-05, "epoch": 8.54721549636804, "percentage": 42.74, "elapsed_time": "0:24:24", "remaining_time": "0:32:42", "throughput": 1975.12, "total_tokens": 2892184}
1421
+ {"current_steps": 7065, "total_steps": 16520, "loss": 0.0002, "lr": 3.535915875898601e-05, "epoch": 8.553268765133172, "percentage": 42.77, "elapsed_time": "0:24:25", "remaining_time": "0:32:40", "throughput": 1975.18, "total_tokens": 2894232}
1422
+ {"current_steps": 7070, "total_steps": 16520, "loss": 0.0001, "lr": 3.533511481764057e-05, "epoch": 8.559322033898304, "percentage": 42.8, "elapsed_time": "0:24:26", "remaining_time": "0:32:39", "throughput": 1975.21, "total_tokens": 2896216}
1423
+ {"current_steps": 7075, "total_steps": 16520, "loss": 0.0001, "lr": 3.531105934043678e-05, "epoch": 8.565375302663439, "percentage": 42.83, "elapsed_time": "0:24:27", "remaining_time": "0:32:38", "throughput": 1975.17, "total_tokens": 2898072}
1424
+ {"current_steps": 7080, "total_steps": 16520, "loss": 0.0001, "lr": 3.5286992354224904e-05, "epoch": 8.571428571428571, "percentage": 42.86, "elapsed_time": "0:24:28", "remaining_time": "0:32:37", "throughput": 1975.19, "total_tokens": 2900056}
1425
+ {"current_steps": 7085, "total_steps": 16520, "loss": 0.0, "lr": 3.5262913885868066e-05, "epoch": 8.577481840193705, "percentage": 42.89, "elapsed_time": "0:24:29", "remaining_time": "0:32:36", "throughput": 1975.24, "total_tokens": 2902072}
1426
+ {"current_steps": 7090, "total_steps": 16520, "loss": 0.0001, "lr": 3.5238823962242176e-05, "epoch": 8.583535108958838, "percentage": 42.92, "elapsed_time": "0:24:30", "remaining_time": "0:32:35", "throughput": 1975.29, "total_tokens": 2904120}
1427
+ {"current_steps": 7095, "total_steps": 16520, "loss": 0.0001, "lr": 3.521472261023596e-05, "epoch": 8.58958837772397, "percentage": 42.95, "elapsed_time": "0:24:31", "remaining_time": "0:32:34", "throughput": 1975.34, "total_tokens": 2906136}
1428
+ {"current_steps": 7100, "total_steps": 16520, "loss": 0.0, "lr": 3.519060985675089e-05, "epoch": 8.595641646489105, "percentage": 42.98, "elapsed_time": "0:24:32", "remaining_time": "0:32:33", "throughput": 1975.36, "total_tokens": 2908120}
1429
+ {"current_steps": 7105, "total_steps": 16520, "loss": 0.0001, "lr": 3.5166485728701145e-05, "epoch": 8.601694915254237, "percentage": 43.01, "elapsed_time": "0:24:33", "remaining_time": "0:32:32", "throughput": 1975.46, "total_tokens": 2910232}
1430
+ {"current_steps": 7110, "total_steps": 16520, "loss": 0.0001, "lr": 3.514235025301365e-05, "epoch": 8.607748184019371, "percentage": 43.04, "elapsed_time": "0:24:34", "remaining_time": "0:32:31", "throughput": 1975.54, "total_tokens": 2912344}
1431
+ {"current_steps": 7115, "total_steps": 16520, "loss": 0.0, "lr": 3.511820345662793e-05, "epoch": 8.613801452784504, "percentage": 43.07, "elapsed_time": "0:24:35", "remaining_time": "0:32:30", "throughput": 1975.6, "total_tokens": 2914424}
1432
+ {"current_steps": 7120, "total_steps": 16520, "loss": 0.0805, "lr": 3.5094045366496184e-05, "epoch": 8.619854721549636, "percentage": 43.1, "elapsed_time": "0:24:36", "remaining_time": "0:32:28", "throughput": 1975.61, "total_tokens": 2916408}
1433
+ {"current_steps": 7125, "total_steps": 16520, "loss": 0.0001, "lr": 3.506987600958324e-05, "epoch": 8.62590799031477, "percentage": 43.13, "elapsed_time": "0:24:37", "remaining_time": "0:32:27", "throughput": 1975.52, "total_tokens": 2918232}
1434
+ {"current_steps": 7130, "total_steps": 16520, "loss": 0.0, "lr": 3.504569541286644e-05, "epoch": 8.631961259079903, "percentage": 43.16, "elapsed_time": "0:24:38", "remaining_time": "0:32:26", "throughput": 1975.54, "total_tokens": 2920248}
1435
+ {"current_steps": 7135, "total_steps": 16520, "loss": 0.0689, "lr": 3.5021503603335725e-05, "epoch": 8.638014527845037, "percentage": 43.19, "elapsed_time": "0:24:39", "remaining_time": "0:32:25", "throughput": 1975.53, "total_tokens": 2922200}
1436
+ {"current_steps": 7140, "total_steps": 16520, "loss": 0.0002, "lr": 3.499730060799352e-05, "epoch": 8.64406779661017, "percentage": 43.22, "elapsed_time": "0:24:40", "remaining_time": "0:32:24", "throughput": 1975.6, "total_tokens": 2924280}
1437
+ {"current_steps": 7145, "total_steps": 16520, "loss": 0.0, "lr": 3.497308645385476e-05, "epoch": 8.650121065375302, "percentage": 43.25, "elapsed_time": "0:24:41", "remaining_time": "0:32:23", "throughput": 1975.67, "total_tokens": 2926360}
1438
+ {"current_steps": 7150, "total_steps": 16520, "loss": 0.0001, "lr": 3.494886116794683e-05, "epoch": 8.656174334140436, "percentage": 43.28, "elapsed_time": "0:24:42", "remaining_time": "0:32:22", "throughput": 1975.78, "total_tokens": 2928504}
1439
+ {"current_steps": 7155, "total_steps": 16520, "loss": 0.0, "lr": 3.4924624777309505e-05, "epoch": 8.662227602905569, "percentage": 43.31, "elapsed_time": "0:24:43", "remaining_time": "0:32:21", "throughput": 1975.78, "total_tokens": 2930488}
1440
+ {"current_steps": 7160, "total_steps": 16520, "loss": 0.0, "lr": 3.490037730899501e-05, "epoch": 8.668280871670703, "percentage": 43.34, "elapsed_time": "0:24:44", "remaining_time": "0:32:20", "throughput": 1975.88, "total_tokens": 2932664}
1441
+ {"current_steps": 7165, "total_steps": 16520, "loss": 0.0001, "lr": 3.4876118790067887e-05, "epoch": 8.674334140435835, "percentage": 43.37, "elapsed_time": "0:24:45", "remaining_time": "0:32:19", "throughput": 1976.0, "total_tokens": 2934840}
1442
+ {"current_steps": 7170, "total_steps": 16520, "loss": 0.0, "lr": 3.485184924760504e-05, "epoch": 8.680387409200968, "percentage": 43.4, "elapsed_time": "0:24:46", "remaining_time": "0:32:18", "throughput": 1976.02, "total_tokens": 2936824}
1443
+ {"current_steps": 7175, "total_steps": 16520, "loss": 0.0886, "lr": 3.482756870869568e-05, "epoch": 8.686440677966102, "percentage": 43.43, "elapsed_time": "0:24:47", "remaining_time": "0:32:17", "throughput": 1976.1, "total_tokens": 2938936}
1444
+ {"current_steps": 7180, "total_steps": 16520, "loss": 0.0, "lr": 3.4803277200441256e-05, "epoch": 8.692493946731235, "percentage": 43.46, "elapsed_time": "0:24:48", "remaining_time": "0:32:15", "throughput": 1976.12, "total_tokens": 2940920}
1445
+ {"current_steps": 7185, "total_steps": 16520, "loss": 0.0001, "lr": 3.477897474995552e-05, "epoch": 8.698547215496369, "percentage": 43.49, "elapsed_time": "0:24:49", "remaining_time": "0:32:14", "throughput": 1976.18, "total_tokens": 2942968}
1446
+ {"current_steps": 7190, "total_steps": 16520, "loss": 0.0128, "lr": 3.475466138436438e-05, "epoch": 8.704600484261501, "percentage": 43.52, "elapsed_time": "0:24:50", "remaining_time": "0:32:13", "throughput": 1976.24, "total_tokens": 2945016}
1447
+ {"current_steps": 7195, "total_steps": 16520, "loss": 0.0022, "lr": 3.473033713080597e-05, "epoch": 8.710653753026634, "percentage": 43.55, "elapsed_time": "0:24:51", "remaining_time": "0:32:12", "throughput": 1976.28, "total_tokens": 2947032}
1448
+ {"current_steps": 7200, "total_steps": 16520, "loss": 0.0, "lr": 3.4706002016430543e-05, "epoch": 8.716707021791768, "percentage": 43.58, "elapsed_time": "0:24:52", "remaining_time": "0:32:11", "throughput": 1976.35, "total_tokens": 2949080}
1449
+ {"current_steps": 7205, "total_steps": 16520, "loss": 0.0645, "lr": 3.4681656068400496e-05, "epoch": 8.7227602905569, "percentage": 43.61, "elapsed_time": "0:24:53", "remaining_time": "0:32:10", "throughput": 1976.46, "total_tokens": 2951224}
1450
+ {"current_steps": 7210, "total_steps": 16520, "loss": 0.1429, "lr": 3.465729931389032e-05, "epoch": 8.728813559322035, "percentage": 43.64, "elapsed_time": "0:24:54", "remaining_time": "0:32:09", "throughput": 1976.49, "total_tokens": 2953208}
1451
+ {"current_steps": 7215, "total_steps": 16520, "loss": 0.0001, "lr": 3.463293178008655e-05, "epoch": 8.734866828087167, "percentage": 43.67, "elapsed_time": "0:24:55", "remaining_time": "0:32:08", "throughput": 1976.6, "total_tokens": 2955352}
1452
+ {"current_steps": 7220, "total_steps": 16520, "loss": 0.0003, "lr": 3.460855349418776e-05, "epoch": 8.7409200968523, "percentage": 43.7, "elapsed_time": "0:24:56", "remaining_time": "0:32:07", "throughput": 1976.71, "total_tokens": 2957496}
1453
+ {"current_steps": 7225, "total_steps": 16520, "loss": 0.0005, "lr": 3.4584164483404544e-05, "epoch": 8.746973365617434, "percentage": 43.73, "elapsed_time": "0:24:57", "remaining_time": "0:32:06", "throughput": 1976.75, "total_tokens": 2959512}
1454
+ {"current_steps": 7230, "total_steps": 16520, "loss": 0.0005, "lr": 3.455976477495944e-05, "epoch": 8.753026634382566, "percentage": 43.77, "elapsed_time": "0:24:58", "remaining_time": "0:32:05", "throughput": 1976.73, "total_tokens": 2961432}
1455
+ {"current_steps": 7235, "total_steps": 16520, "loss": 0.0001, "lr": 3.453535439608694e-05, "epoch": 8.7590799031477, "percentage": 43.8, "elapsed_time": "0:24:59", "remaining_time": "0:32:03", "throughput": 1976.79, "total_tokens": 2963480}
1456
+ {"current_steps": 7240, "total_steps": 16520, "loss": 0.0003, "lr": 3.4510933374033445e-05, "epoch": 8.765133171912833, "percentage": 43.83, "elapsed_time": "0:25:00", "remaining_time": "0:32:02", "throughput": 1976.85, "total_tokens": 2965528}
1457
+ {"current_steps": 7245, "total_steps": 16520, "loss": 0.0, "lr": 3.448650173605723e-05, "epoch": 8.771186440677965, "percentage": 43.86, "elapsed_time": "0:25:01", "remaining_time": "0:32:01", "throughput": 1976.85, "total_tokens": 2967480}
1458
+ {"current_steps": 7250, "total_steps": 16520, "loss": 0.0001, "lr": 3.4462059509428435e-05, "epoch": 8.7772397094431, "percentage": 43.89, "elapsed_time": "0:25:02", "remaining_time": "0:32:00", "throughput": 1976.96, "total_tokens": 2969624}
1459
+ {"current_steps": 7255, "total_steps": 16520, "loss": 0.0003, "lr": 3.443760672142901e-05, "epoch": 8.783292978208232, "percentage": 43.92, "elapsed_time": "0:25:03", "remaining_time": "0:31:59", "throughput": 1977.08, "total_tokens": 2971800}
1460
+ {"current_steps": 7260, "total_steps": 16520, "loss": 0.0129, "lr": 3.441314339935266e-05, "epoch": 8.789346246973366, "percentage": 43.95, "elapsed_time": "0:25:04", "remaining_time": "0:31:58", "throughput": 1977.14, "total_tokens": 2973848}
1461
+ {"current_steps": 7265, "total_steps": 16520, "loss": 0.0001, "lr": 3.438866957050492e-05, "epoch": 8.795399515738499, "percentage": 43.98, "elapsed_time": "0:25:05", "remaining_time": "0:31:57", "throughput": 1977.25, "total_tokens": 2975992}
1462
+ {"current_steps": 7270, "total_steps": 16520, "loss": 0.0, "lr": 3.4364185262202984e-05, "epoch": 8.801452784503631, "percentage": 44.01, "elapsed_time": "0:25:06", "remaining_time": "0:31:56", "throughput": 1977.33, "total_tokens": 2978072}
1463
+ {"current_steps": 7275, "total_steps": 16520, "loss": 0.0001, "lr": 3.4339690501775784e-05, "epoch": 8.807506053268765, "percentage": 44.04, "elapsed_time": "0:25:07", "remaining_time": "0:31:55", "throughput": 1977.36, "total_tokens": 2980056}
1464
+ {"current_steps": 7280, "total_steps": 16520, "loss": 0.0007, "lr": 3.43151853165639e-05, "epoch": 8.813559322033898, "percentage": 44.07, "elapsed_time": "0:25:08", "remaining_time": "0:31:54", "throughput": 1977.42, "total_tokens": 2982104}
1465
+ {"current_steps": 7285, "total_steps": 16520, "loss": 0.0001, "lr": 3.429066973391955e-05, "epoch": 8.819612590799032, "percentage": 44.1, "elapsed_time": "0:25:09", "remaining_time": "0:31:52", "throughput": 1977.47, "total_tokens": 2984120}
1466
+ {"current_steps": 7290, "total_steps": 16520, "loss": 0.0005, "lr": 3.426614378120657e-05, "epoch": 8.825665859564165, "percentage": 44.13, "elapsed_time": "0:25:10", "remaining_time": "0:31:51", "throughput": 1977.54, "total_tokens": 2986200}
1467
+ {"current_steps": 7295, "total_steps": 16520, "loss": 0.0, "lr": 3.4241607485800365e-05, "epoch": 8.831719128329297, "percentage": 44.16, "elapsed_time": "0:25:11", "remaining_time": "0:31:50", "throughput": 1977.68, "total_tokens": 2988408}
1468
+ {"current_steps": 7300, "total_steps": 16520, "loss": 0.0, "lr": 3.4217060875087856e-05, "epoch": 8.837772397094431, "percentage": 44.19, "elapsed_time": "0:25:12", "remaining_time": "0:31:49", "throughput": 1977.71, "total_tokens": 2990392}
1469
+ {"current_steps": 7305, "total_steps": 16520, "loss": 0.0001, "lr": 3.419250397646753e-05, "epoch": 8.843825665859564, "percentage": 44.22, "elapsed_time": "0:25:13", "remaining_time": "0:31:48", "throughput": 1977.8, "total_tokens": 2992472}
1470
+ {"current_steps": 7310, "total_steps": 16520, "loss": 0.0, "lr": 3.416793681734932e-05, "epoch": 8.849878934624698, "percentage": 44.25, "elapsed_time": "0:25:14", "remaining_time": "0:31:47", "throughput": 1977.81, "total_tokens": 2994424}
1471
+ {"current_steps": 7315, "total_steps": 16520, "loss": 0.0, "lr": 3.414335942515461e-05, "epoch": 8.85593220338983, "percentage": 44.28, "elapsed_time": "0:25:15", "remaining_time": "0:31:46", "throughput": 1977.76, "total_tokens": 2996312}
1472
+ {"current_steps": 7320, "total_steps": 16520, "loss": 0.0, "lr": 3.411877182731623e-05, "epoch": 8.861985472154963, "percentage": 44.31, "elapsed_time": "0:25:15", "remaining_time": "0:31:45", "throughput": 1977.83, "total_tokens": 2998392}
1473
+ {"current_steps": 7325, "total_steps": 16520, "loss": 0.0488, "lr": 3.409417405127839e-05, "epoch": 8.868038740920097, "percentage": 44.34, "elapsed_time": "0:25:17", "remaining_time": "0:31:44", "throughput": 1977.94, "total_tokens": 3000536}
1474
+ {"current_steps": 7330, "total_steps": 16520, "loss": 0.0001, "lr": 3.406956612449665e-05, "epoch": 8.87409200968523, "percentage": 44.37, "elapsed_time": "0:25:18", "remaining_time": "0:31:43", "throughput": 1978.03, "total_tokens": 3002648}
1475
+ {"current_steps": 7335, "total_steps": 16520, "loss": 0.0, "lr": 3.404494807443791e-05, "epoch": 8.880145278450364, "percentage": 44.4, "elapsed_time": "0:25:19", "remaining_time": "0:31:42", "throughput": 1978.14, "total_tokens": 3004792}
1476
+ {"current_steps": 7340, "total_steps": 16520, "loss": 0.1327, "lr": 3.402031992858037e-05, "epoch": 8.886198547215496, "percentage": 44.43, "elapsed_time": "0:25:20", "remaining_time": "0:31:41", "throughput": 1978.25, "total_tokens": 3006936}
1477
+ {"current_steps": 7345, "total_steps": 16520, "loss": 0.0005, "lr": 3.3995681714413505e-05, "epoch": 8.892251815980629, "percentage": 44.46, "elapsed_time": "0:25:20", "remaining_time": "0:31:39", "throughput": 1978.22, "total_tokens": 3008824}
1478
+ {"current_steps": 7350, "total_steps": 16520, "loss": 0.0012, "lr": 3.397103345943802e-05, "epoch": 8.898305084745763, "percentage": 44.49, "elapsed_time": "0:25:21", "remaining_time": "0:31:38", "throughput": 1978.33, "total_tokens": 3011000}
1479
+ {"current_steps": 7355, "total_steps": 16520, "loss": 0.0004, "lr": 3.394637519116581e-05, "epoch": 8.904358353510895, "percentage": 44.52, "elapsed_time": "0:25:22", "remaining_time": "0:31:37", "throughput": 1978.4, "total_tokens": 3013080}
1480
+ {"current_steps": 7360, "total_steps": 16520, "loss": 0.0002, "lr": 3.3921706937119984e-05, "epoch": 8.91041162227603, "percentage": 44.55, "elapsed_time": "0:25:23", "remaining_time": "0:31:36", "throughput": 1978.4, "total_tokens": 3015032}
1481
+ {"current_steps": 7365, "total_steps": 16520, "loss": 0.0002, "lr": 3.389702872483477e-05, "epoch": 8.916464891041162, "percentage": 44.58, "elapsed_time": "0:25:24", "remaining_time": "0:31:35", "throughput": 1978.46, "total_tokens": 3017080}
1482
+ {"current_steps": 7370, "total_steps": 16520, "loss": 0.0001, "lr": 3.387234058185553e-05, "epoch": 8.922518159806295, "percentage": 44.61, "elapsed_time": "0:25:25", "remaining_time": "0:31:34", "throughput": 1978.58, "total_tokens": 3019256}
1483
+ {"current_steps": 7375, "total_steps": 16520, "loss": 0.0001, "lr": 3.3847642535738675e-05, "epoch": 8.928571428571429, "percentage": 44.64, "elapsed_time": "0:25:26", "remaining_time": "0:31:33", "throughput": 1978.62, "total_tokens": 3021272}
1484
+ {"current_steps": 7380, "total_steps": 16520, "loss": 0.0001, "lr": 3.382293461405171e-05, "epoch": 8.934624697336561, "percentage": 44.67, "elapsed_time": "0:25:27", "remaining_time": "0:31:32", "throughput": 1978.61, "total_tokens": 3023224}
1485
+ {"current_steps": 7385, "total_steps": 16520, "loss": 0.0558, "lr": 3.379821684437314e-05, "epoch": 8.940677966101696, "percentage": 44.7, "elapsed_time": "0:25:28", "remaining_time": "0:31:31", "throughput": 1978.65, "total_tokens": 3025272}
1486
+ {"current_steps": 7390, "total_steps": 16520, "loss": 0.0001, "lr": 3.377348925429249e-05, "epoch": 8.946731234866828, "percentage": 44.73, "elapsed_time": "0:25:29", "remaining_time": "0:31:30", "throughput": 1978.68, "total_tokens": 3027256}
1487
+ {"current_steps": 7395, "total_steps": 16520, "loss": 0.0001, "lr": 3.3748751871410193e-05, "epoch": 8.95278450363196, "percentage": 44.76, "elapsed_time": "0:25:30", "remaining_time": "0:31:29", "throughput": 1978.72, "total_tokens": 3029272}
1488
+ {"current_steps": 7400, "total_steps": 16520, "loss": 0.0002, "lr": 3.372400472333765e-05, "epoch": 8.958837772397095, "percentage": 44.79, "elapsed_time": "0:25:31", "remaining_time": "0:31:27", "throughput": 1978.8, "total_tokens": 3031352}
1489
+ {"current_steps": 7405, "total_steps": 16520, "loss": 0.0001, "lr": 3.369924783769719e-05, "epoch": 8.964891041162227, "percentage": 44.82, "elapsed_time": "0:25:32", "remaining_time": "0:31:26", "throughput": 1978.85, "total_tokens": 3033368}
1490
+ {"current_steps": 7410, "total_steps": 16520, "loss": 0.0001, "lr": 3.3674481242121934e-05, "epoch": 8.970944309927361, "percentage": 44.85, "elapsed_time": "0:25:33", "remaining_time": "0:31:25", "throughput": 1978.9, "total_tokens": 3035416}
1491
+ {"current_steps": 7415, "total_steps": 16520, "loss": 0.0, "lr": 3.364970496425591e-05, "epoch": 8.976997578692494, "percentage": 44.88, "elapsed_time": "0:25:34", "remaining_time": "0:31:24", "throughput": 1978.98, "total_tokens": 3037496}
1492
+ {"current_steps": 7420, "total_steps": 16520, "loss": 0.0001, "lr": 3.362491903175392e-05, "epoch": 8.983050847457626, "percentage": 44.92, "elapsed_time": "0:25:35", "remaining_time": "0:31:23", "throughput": 1979.07, "total_tokens": 3039608}
1493
+ {"current_steps": 7425, "total_steps": 16520, "loss": 0.0001, "lr": 3.360012347228155e-05, "epoch": 8.98910411622276, "percentage": 44.95, "elapsed_time": "0:25:36", "remaining_time": "0:31:22", "throughput": 1979.09, "total_tokens": 3041592}
1494
+ {"current_steps": 7430, "total_steps": 16520, "loss": 0.0006, "lr": 3.357531831351514e-05, "epoch": 8.995157384987893, "percentage": 44.98, "elapsed_time": "0:25:37", "remaining_time": "0:31:21", "throughput": 1979.16, "total_tokens": 3043640}
1495
+ {"current_steps": 7434, "total_steps": 16520, "eval_loss": 0.24652281403541565, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:25:43", "remaining_time": "0:31:27", "throughput": 1972.19, "total_tokens": 3044984}
1496
+ {"current_steps": 7435, "total_steps": 16520, "loss": 0.0001, "lr": 3.355050358314172e-05, "epoch": 9.001210653753027, "percentage": 45.01, "elapsed_time": "0:25:45", "remaining_time": "0:31:28", "throughput": 1970.04, "total_tokens": 3045368}
1497
+ {"current_steps": 7440, "total_steps": 16520, "loss": 0.0, "lr": 3.352567930885902e-05, "epoch": 9.00726392251816, "percentage": 45.04, "elapsed_time": "0:25:46", "remaining_time": "0:31:27", "throughput": 1970.06, "total_tokens": 3047384}
1498
+ {"current_steps": 7445, "total_steps": 16520, "loss": 0.0, "lr": 3.350084551837545e-05, "epoch": 9.013317191283292, "percentage": 45.07, "elapsed_time": "0:25:47", "remaining_time": "0:31:26", "throughput": 1970.04, "total_tokens": 3049304}
1499
+ {"current_steps": 7450, "total_steps": 16520, "loss": 0.0, "lr": 3.347600223940998e-05, "epoch": 9.019370460048426, "percentage": 45.1, "elapsed_time": "0:25:48", "remaining_time": "0:31:25", "throughput": 1970.13, "total_tokens": 3051448}
1500
+ {"current_steps": 7455, "total_steps": 16520, "loss": 0.0544, "lr": 3.345114949969222e-05, "epoch": 9.025423728813559, "percentage": 45.13, "elapsed_time": "0:25:50", "remaining_time": "0:31:25", "throughput": 1969.68, "total_tokens": 3053464}
1501
+ {"current_steps": 7460, "total_steps": 16520, "loss": 0.0, "lr": 3.3426287326962334e-05, "epoch": 9.031476997578693, "percentage": 45.16, "elapsed_time": "0:25:51", "remaining_time": "0:31:23", "throughput": 1969.75, "total_tokens": 3055544}
1502
+ {"current_steps": 7465, "total_steps": 16520, "loss": 0.0011, "lr": 3.3401415748970984e-05, "epoch": 9.037530266343826, "percentage": 45.19, "elapsed_time": "0:25:52", "remaining_time": "0:31:22", "throughput": 1969.78, "total_tokens": 3057560}
1503
+ {"current_steps": 7470, "total_steps": 16520, "loss": 0.0, "lr": 3.337653479347937e-05, "epoch": 9.043583535108958, "percentage": 45.22, "elapsed_time": "0:25:53", "remaining_time": "0:31:21", "throughput": 1969.86, "total_tokens": 3059640}
1504
+ {"current_steps": 7475, "total_steps": 16520, "loss": 0.0, "lr": 3.335164448825913e-05, "epoch": 9.049636803874092, "percentage": 45.25, "elapsed_time": "0:25:54", "remaining_time": "0:31:20", "throughput": 1969.92, "total_tokens": 3061688}