Training in progress, step 8260
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +81 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 26214528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:312bdc882fa360973808b397c1843d01d8ead0275110cad520017a51e3cf7b0e
|
| 3 |
size 26214528
|
trainer_log.jsonl
CHANGED
|
@@ -1590,3 +1590,84 @@
|
|
| 1590 |
{"current_steps": 7855, "total_steps": 8260, "loss": 0.1898, "lr": 3.670718306630766e-07, "epoch": 9.509685230024212, "percentage": 95.1, "elapsed_time": "0:16:12", "remaining_time": "0:00:50", "throughput": 3311.69, "total_tokens": 3221648}
|
| 1591 |
{"current_steps": 7860, "total_steps": 8260, "loss": 0.1619, "lr": 3.5810783919895673e-07, "epoch": 9.515738498789347, "percentage": 95.16, "elapsed_time": "0:16:13", "remaining_time": "0:00:49", "throughput": 3311.94, "total_tokens": 3223632}
|
| 1592 |
{"current_steps": 7865, "total_steps": 8260, "loss": 0.1771, "lr": 3.4925386694723284e-07, "epoch": 9.521791767554479, "percentage": 95.22, "elapsed_time": "0:16:13", "remaining_time": "0:00:48", "throughput": 3312.1, "total_tokens": 3225616}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1590 |
{"current_steps": 7855, "total_steps": 8260, "loss": 0.1898, "lr": 3.670718306630766e-07, "epoch": 9.509685230024212, "percentage": 95.1, "elapsed_time": "0:16:12", "remaining_time": "0:00:50", "throughput": 3311.69, "total_tokens": 3221648}
|
| 1591 |
{"current_steps": 7860, "total_steps": 8260, "loss": 0.1619, "lr": 3.5810783919895673e-07, "epoch": 9.515738498789347, "percentage": 95.16, "elapsed_time": "0:16:13", "remaining_time": "0:00:49", "throughput": 3311.94, "total_tokens": 3223632}
|
| 1592 |
{"current_steps": 7865, "total_steps": 8260, "loss": 0.1771, "lr": 3.4925386694723284e-07, "epoch": 9.521791767554479, "percentage": 95.22, "elapsed_time": "0:16:13", "remaining_time": "0:00:48", "throughput": 3312.1, "total_tokens": 3225616}
|
| 1593 |
+
{"current_steps": 7870, "total_steps": 8260, "loss": 0.1762, "lr": 3.405099534384393e-07, "epoch": 9.527845036319613, "percentage": 95.28, "elapsed_time": "0:16:14", "remaining_time": "0:00:48", "throughput": 3312.38, "total_tokens": 3227728}
|
| 1594 |
+
{"current_steps": 7875, "total_steps": 8260, "loss": 0.1882, "lr": 3.31876137711723e-07, "epoch": 9.533898305084746, "percentage": 95.34, "elapsed_time": "0:16:14", "remaining_time": "0:00:47", "throughput": 3312.58, "total_tokens": 3229744}
|
| 1595 |
+
{"current_steps": 7880, "total_steps": 8260, "loss": 0.2062, "lr": 3.233524583146741e-07, "epoch": 9.539951573849878, "percentage": 95.4, "elapsed_time": "0:16:15", "remaining_time": "0:00:47", "throughput": 3312.86, "total_tokens": 3231664}
|
| 1596 |
+
{"current_steps": 7885, "total_steps": 8260, "loss": 0.1716, "lr": 3.149389533031566e-07, "epoch": 9.546004842615012, "percentage": 95.46, "elapsed_time": "0:16:16", "remaining_time": "0:00:46", "throughput": 3313.1, "total_tokens": 3233712}
|
| 1597 |
+
{"current_steps": 7890, "total_steps": 8260, "loss": 0.162, "lr": 3.066356602411419e-07, "epoch": 9.552058111380145, "percentage": 95.52, "elapsed_time": "0:16:16", "remaining_time": "0:00:45", "throughput": 3313.37, "total_tokens": 3235728}
|
| 1598 |
+
{"current_steps": 7895, "total_steps": 8260, "loss": 0.216, "lr": 2.984426162005227e-07, "epoch": 9.558111380145279, "percentage": 95.58, "elapsed_time": "0:16:17", "remaining_time": "0:00:45", "throughput": 3313.64, "total_tokens": 3237712}
|
| 1599 |
+
{"current_steps": 7900, "total_steps": 8260, "loss": 0.1872, "lr": 2.903598577609717e-07, "epoch": 9.564164648910412, "percentage": 95.64, "elapsed_time": "0:16:17", "remaining_time": "0:00:44", "throughput": 3313.8, "total_tokens": 3239664}
|
| 1600 |
+
{"current_steps": 7905, "total_steps": 8260, "loss": 0.1983, "lr": 2.823874210097638e-07, "epoch": 9.570217917675544, "percentage": 95.7, "elapsed_time": "0:16:18", "remaining_time": "0:00:43", "throughput": 3314.01, "total_tokens": 3241776}
|
| 1601 |
+
{"current_steps": 7910, "total_steps": 8260, "loss": 0.1846, "lr": 2.745253415416177e-07, "epoch": 9.576271186440678, "percentage": 95.76, "elapsed_time": "0:16:18", "remaining_time": "0:00:43", "throughput": 3314.32, "total_tokens": 3243824}
|
| 1602 |
+
{"current_steps": 7915, "total_steps": 8260, "loss": 0.1802, "lr": 2.6677365445852976e-07, "epoch": 9.58232445520581, "percentage": 95.82, "elapsed_time": "0:16:19", "remaining_time": "0:00:42", "throughput": 3314.52, "total_tokens": 3246000}
|
| 1603 |
+
{"current_steps": 7920, "total_steps": 8260, "loss": 0.1698, "lr": 2.5913239436964054e-07, "epoch": 9.588377723970945, "percentage": 95.88, "elapsed_time": "0:16:19", "remaining_time": "0:00:42", "throughput": 3314.82, "total_tokens": 3248112}
|
| 1604 |
+
{"current_steps": 7925, "total_steps": 8260, "loss": 0.195, "lr": 2.5160159539105443e-07, "epoch": 9.594430992736077, "percentage": 95.94, "elapsed_time": "0:16:20", "remaining_time": "0:00:41", "throughput": 3315.11, "total_tokens": 3250256}
|
| 1605 |
+
{"current_steps": 7930, "total_steps": 8260, "loss": 0.1914, "lr": 2.441812911456981e-07, "epoch": 9.600484261501212, "percentage": 96.0, "elapsed_time": "0:16:20", "remaining_time": "0:00:40", "throughput": 3315.41, "total_tokens": 3252368}
|
| 1606 |
+
{"current_steps": 7935, "total_steps": 8260, "loss": 0.1968, "lr": 2.3687151476317337e-07, "epoch": 9.606537530266344, "percentage": 96.07, "elapsed_time": "0:16:21", "remaining_time": "0:00:40", "throughput": 3315.64, "total_tokens": 3254416}
|
| 1607 |
+
{"current_steps": 7940, "total_steps": 8260, "loss": 0.221, "lr": 2.2967229887960186e-07, "epoch": 9.612590799031477, "percentage": 96.13, "elapsed_time": "0:16:22", "remaining_time": "0:00:39", "throughput": 3315.76, "total_tokens": 3256496}
|
| 1608 |
+
{"current_steps": 7945, "total_steps": 8260, "loss": 0.1964, "lr": 2.2258367563748884e-07, "epoch": 9.61864406779661, "percentage": 96.19, "elapsed_time": "0:16:22", "remaining_time": "0:00:38", "throughput": 3316.02, "total_tokens": 3258576}
|
| 1609 |
+
{"current_steps": 7950, "total_steps": 8260, "loss": 0.1939, "lr": 2.1560567668556797e-07, "epoch": 9.624697336561743, "percentage": 96.25, "elapsed_time": "0:16:23", "remaining_time": "0:00:38", "throughput": 3316.2, "total_tokens": 3260496}
|
| 1610 |
+
{"current_steps": 7955, "total_steps": 8260, "loss": 0.1997, "lr": 2.0873833317866798e-07, "epoch": 9.630750605326877, "percentage": 96.31, "elapsed_time": "0:16:23", "remaining_time": "0:00:37", "throughput": 3316.48, "total_tokens": 3262608}
|
| 1611 |
+
{"current_steps": 7960, "total_steps": 8260, "loss": 0.212, "lr": 2.019816757775711e-07, "epoch": 9.63680387409201, "percentage": 96.37, "elapsed_time": "0:16:24", "remaining_time": "0:00:37", "throughput": 3316.72, "total_tokens": 3264592}
|
| 1612 |
+
{"current_steps": 7965, "total_steps": 8260, "loss": 0.1611, "lr": 1.9533573464888543e-07, "epoch": 9.642857142857142, "percentage": 96.43, "elapsed_time": "0:16:24", "remaining_time": "0:00:36", "throughput": 3316.94, "total_tokens": 3266704}
|
| 1613 |
+
{"current_steps": 7970, "total_steps": 8260, "loss": 0.193, "lr": 1.8880053946488675e-07, "epoch": 9.648910411622277, "percentage": 96.49, "elapsed_time": "0:16:25", "remaining_time": "0:00:35", "throughput": 3317.15, "total_tokens": 3268816}
|
| 1614 |
+
{"current_steps": 7975, "total_steps": 8260, "loss": 0.2111, "lr": 1.8237611940341291e-07, "epoch": 9.654963680387409, "percentage": 96.55, "elapsed_time": "0:16:25", "remaining_time": "0:00:35", "throughput": 3317.38, "total_tokens": 3270864}
|
| 1615 |
+
{"current_steps": 7980, "total_steps": 8260, "loss": 0.1854, "lr": 1.760625031477142e-07, "epoch": 9.661016949152543, "percentage": 96.61, "elapsed_time": "0:16:26", "remaining_time": "0:00:34", "throughput": 3317.57, "total_tokens": 3272944}
|
| 1616 |
+
{"current_steps": 7985, "total_steps": 8260, "loss": 0.1695, "lr": 1.6985971888633935e-07, "epoch": 9.667070217917676, "percentage": 96.67, "elapsed_time": "0:16:27", "remaining_time": "0:00:33", "throughput": 3317.8, "total_tokens": 3274992}
|
| 1617 |
+
{"current_steps": 7990, "total_steps": 8260, "loss": 0.1488, "lr": 1.637677943129967e-07, "epoch": 9.673123486682808, "percentage": 96.73, "elapsed_time": "0:16:27", "remaining_time": "0:00:33", "throughput": 3318.01, "total_tokens": 3277008}
|
| 1618 |
+
{"current_steps": 7995, "total_steps": 8260, "loss": 0.2143, "lr": 1.5778675662643793e-07, "epoch": 9.679176755447942, "percentage": 96.79, "elapsed_time": "0:16:28", "remaining_time": "0:00:32", "throughput": 3318.19, "total_tokens": 3278928}
|
| 1619 |
+
{"current_steps": 8000, "total_steps": 8260, "loss": 0.1922, "lr": 1.5191663253034116e-07, "epoch": 9.685230024213075, "percentage": 96.85, "elapsed_time": "0:16:28", "remaining_time": "0:00:32", "throughput": 3318.39, "total_tokens": 3280944}
|
| 1620 |
+
{"current_steps": 8005, "total_steps": 8260, "loss": 0.2114, "lr": 1.461574482331779e-07, "epoch": 9.69128329297821, "percentage": 96.91, "elapsed_time": "0:16:29", "remaining_time": "0:00:31", "throughput": 3318.53, "total_tokens": 3282960}
|
| 1621 |
+
{"current_steps": 8010, "total_steps": 8260, "loss": 0.1732, "lr": 1.4050922944811305e-07, "epoch": 9.697336561743342, "percentage": 96.97, "elapsed_time": "0:16:29", "remaining_time": "0:00:30", "throughput": 3318.74, "total_tokens": 3285008}
|
| 1622 |
+
{"current_steps": 8015, "total_steps": 8260, "loss": 0.2237, "lr": 1.349720013928718e-07, "epoch": 9.703389830508474, "percentage": 97.03, "elapsed_time": "0:16:30", "remaining_time": "0:00:30", "throughput": 3319.0, "total_tokens": 3287088}
|
| 1623 |
+
{"current_steps": 8020, "total_steps": 8260, "loss": 0.1781, "lr": 1.2954578878964507e-07, "epoch": 9.709443099273608, "percentage": 97.09, "elapsed_time": "0:16:30", "remaining_time": "0:00:29", "throughput": 3319.24, "total_tokens": 3289168}
|
| 1624 |
+
{"current_steps": 8025, "total_steps": 8260, "loss": 0.2143, "lr": 1.2423061586496477e-07, "epoch": 9.71549636803874, "percentage": 97.15, "elapsed_time": "0:16:31", "remaining_time": "0:00:29", "throughput": 3319.47, "total_tokens": 3291376}
|
| 1625 |
+
{"current_steps": 8030, "total_steps": 8260, "loss": 0.1804, "lr": 1.1902650634960378e-07, "epoch": 9.721549636803875, "percentage": 97.22, "elapsed_time": "0:16:32", "remaining_time": "0:00:28", "throughput": 3319.72, "total_tokens": 3293360}
|
| 1626 |
+
{"current_steps": 8035, "total_steps": 8260, "loss": 0.1749, "lr": 1.1393348347846777e-07, "epoch": 9.727602905569007, "percentage": 97.28, "elapsed_time": "0:16:32", "remaining_time": "0:00:27", "throughput": 3319.9, "total_tokens": 3295344}
|
| 1627 |
+
{"current_steps": 8040, "total_steps": 8260, "loss": 0.1736, "lr": 1.0895156999048972e-07, "epoch": 9.73365617433414, "percentage": 97.34, "elapsed_time": "0:16:33", "remaining_time": "0:00:27", "throughput": 3320.12, "total_tokens": 3297392}
|
| 1628 |
+
{"current_steps": 8045, "total_steps": 8260, "loss": 0.204, "lr": 1.0408078812853273e-07, "epoch": 9.739709443099274, "percentage": 97.4, "elapsed_time": "0:16:33", "remaining_time": "0:00:26", "throughput": 3320.38, "total_tokens": 3299376}
|
| 1629 |
+
{"current_steps": 8050, "total_steps": 8260, "loss": 0.1752, "lr": 9.932115963928734e-08, "epoch": 9.745762711864407, "percentage": 97.46, "elapsed_time": "0:16:34", "remaining_time": "0:00:25", "throughput": 3320.7, "total_tokens": 3301360}
|
| 1630 |
+
{"current_steps": 8055, "total_steps": 8260, "loss": 0.2065, "lr": 9.467270577317167e-08, "epoch": 9.75181598062954, "percentage": 97.52, "elapsed_time": "0:16:34", "remaining_time": "0:00:25", "throughput": 3320.96, "total_tokens": 3303440}
|
| 1631 |
+
{"current_steps": 8060, "total_steps": 8260, "loss": 0.1783, "lr": 9.013544728424528e-08, "epoch": 9.757869249394673, "percentage": 97.58, "elapsed_time": "0:16:35", "remaining_time": "0:00:24", "throughput": 3321.25, "total_tokens": 3305552}
|
| 1632 |
+
{"current_steps": 8065, "total_steps": 8260, "loss": 0.186, "lr": 8.570940443010655e-08, "epoch": 9.763922518159806, "percentage": 97.64, "elapsed_time": "0:16:35", "remaining_time": "0:00:24", "throughput": 3321.51, "total_tokens": 3307728}
|
| 1633 |
+
{"current_steps": 8070, "total_steps": 8260, "loss": 0.2001, "lr": 8.139459697181218e-08, "epoch": 9.76997578692494, "percentage": 97.7, "elapsed_time": "0:16:36", "remaining_time": "0:00:23", "throughput": 3321.74, "total_tokens": 3309776}
|
| 1634 |
+
{"current_steps": 8075, "total_steps": 8260, "loss": 0.1761, "lr": 7.719104417377443e-08, "epoch": 9.776029055690072, "percentage": 97.76, "elapsed_time": "0:16:36", "remaining_time": "0:00:22", "throughput": 3321.99, "total_tokens": 3311760}
|
| 1635 |
+
{"current_steps": 8080, "total_steps": 8260, "loss": 0.1639, "lr": 7.30987648036946e-08, "epoch": 9.782082324455207, "percentage": 97.82, "elapsed_time": "0:16:37", "remaining_time": "0:00:22", "throughput": 3322.2, "total_tokens": 3313808}
|
| 1636 |
+
{"current_steps": 8085, "total_steps": 8260, "loss": 0.1776, "lr": 6.911777713246581e-08, "epoch": 9.788135593220339, "percentage": 97.88, "elapsed_time": "0:16:38", "remaining_time": "0:00:21", "throughput": 3322.39, "total_tokens": 3315888}
|
| 1637 |
+
{"current_steps": 8090, "total_steps": 8260, "loss": 0.1788, "lr": 6.524809893409256e-08, "epoch": 9.794188861985472, "percentage": 97.94, "elapsed_time": "0:16:38", "remaining_time": "0:00:20", "throughput": 3322.6, "total_tokens": 3318000}
|
| 1638 |
+
{"current_steps": 8095, "total_steps": 8260, "loss": 0.1538, "lr": 6.148974748561299e-08, "epoch": 9.800242130750606, "percentage": 98.0, "elapsed_time": "0:16:39", "remaining_time": "0:00:20", "throughput": 3322.8, "total_tokens": 3320016}
|
| 1639 |
+
{"current_steps": 8100, "total_steps": 8260, "loss": 0.188, "lr": 5.784273956702391e-08, "epoch": 9.806295399515738, "percentage": 98.06, "elapsed_time": "0:16:39", "remaining_time": "0:00:19", "throughput": 3322.98, "total_tokens": 3322096}
|
| 1640 |
+
{"current_steps": 8105, "total_steps": 8260, "loss": 0.1457, "lr": 5.4307091461205936e-08, "epoch": 9.812348668280872, "percentage": 98.12, "elapsed_time": "0:16:40", "remaining_time": "0:00:19", "throughput": 3323.15, "total_tokens": 3324176}
|
| 1641 |
+
{"current_steps": 8110, "total_steps": 8260, "loss": 0.179, "lr": 5.08828189538485e-08, "epoch": 9.818401937046005, "percentage": 98.18, "elapsed_time": "0:16:40", "remaining_time": "0:00:18", "throughput": 3323.31, "total_tokens": 3326320}
|
| 1642 |
+
{"current_steps": 8115, "total_steps": 8260, "loss": 0.1771, "lr": 4.7569937333372115e-08, "epoch": 9.824455205811137, "percentage": 98.24, "elapsed_time": "0:16:41", "remaining_time": "0:00:17", "throughput": 3323.55, "total_tokens": 3328464}
|
| 1643 |
+
{"current_steps": 8120, "total_steps": 8260, "loss": 0.1587, "lr": 4.436846139087847e-08, "epoch": 9.830508474576272, "percentage": 98.31, "elapsed_time": "0:16:42", "remaining_time": "0:00:17", "throughput": 3323.75, "total_tokens": 3330480}
|
| 1644 |
+
{"current_steps": 8125, "total_steps": 8260, "loss": 0.2105, "lr": 4.127840542006711e-08, "epoch": 9.836561743341404, "percentage": 98.37, "elapsed_time": "0:16:42", "remaining_time": "0:00:16", "throughput": 3323.99, "total_tokens": 3332624}
|
| 1645 |
+
{"current_steps": 8130, "total_steps": 8260, "loss": 0.155, "lr": 3.829978321718553e-08, "epoch": 9.842615012106538, "percentage": 98.43, "elapsed_time": "0:16:43", "remaining_time": "0:00:16", "throughput": 3324.31, "total_tokens": 3334768}
|
| 1646 |
+
{"current_steps": 8135, "total_steps": 8260, "loss": 0.1654, "lr": 3.543260808095139e-08, "epoch": 9.84866828087167, "percentage": 98.49, "elapsed_time": "0:16:43", "remaining_time": "0:00:15", "throughput": 3324.51, "total_tokens": 3336784}
|
| 1647 |
+
{"current_steps": 8140, "total_steps": 8260, "loss": 0.1586, "lr": 3.267689281250541e-08, "epoch": 9.854721549636803, "percentage": 98.55, "elapsed_time": "0:16:44", "remaining_time": "0:00:14", "throughput": 3324.73, "total_tokens": 3338832}
|
| 1648 |
+
{"current_steps": 8145, "total_steps": 8260, "loss": 0.2214, "lr": 3.003264971535857e-08, "epoch": 9.860774818401937, "percentage": 98.61, "elapsed_time": "0:16:44", "remaining_time": "0:00:14", "throughput": 3324.94, "total_tokens": 3340848}
|
| 1649 |
+
{"current_steps": 8150, "total_steps": 8260, "loss": 0.1815, "lr": 2.7499890595314438e-08, "epoch": 9.86682808716707, "percentage": 98.67, "elapsed_time": "0:16:45", "remaining_time": "0:00:13", "throughput": 3325.13, "total_tokens": 3342960}
|
| 1650 |
+
{"current_steps": 8155, "total_steps": 8260, "loss": 0.1815, "lr": 2.507862676044137e-08, "epoch": 9.872881355932204, "percentage": 98.73, "elapsed_time": "0:16:45", "remaining_time": "0:00:12", "throughput": 3325.37, "total_tokens": 3345104}
|
| 1651 |
+
{"current_steps": 8160, "total_steps": 8260, "loss": 0.1746, "lr": 2.2768869021014274e-08, "epoch": 9.878934624697337, "percentage": 98.79, "elapsed_time": "0:16:46", "remaining_time": "0:00:12", "throughput": 3325.56, "total_tokens": 3347024}
|
| 1652 |
+
{"current_steps": 8165, "total_steps": 8260, "loss": 0.1641, "lr": 2.0570627689459054e-08, "epoch": 9.884987893462469, "percentage": 98.85, "elapsed_time": "0:16:47", "remaining_time": "0:00:11", "throughput": 3325.79, "total_tokens": 3349200}
|
| 1653 |
+
{"current_steps": 8170, "total_steps": 8260, "loss": 0.1776, "lr": 1.848391258031379e-08, "epoch": 9.891041162227603, "percentage": 98.91, "elapsed_time": "0:16:47", "remaining_time": "0:00:11", "throughput": 3326.07, "total_tokens": 3351248}
|
| 1654 |
+
{"current_steps": 8175, "total_steps": 8260, "loss": 0.1944, "lr": 1.6508733010184297e-08, "epoch": 9.897094430992736, "percentage": 98.97, "elapsed_time": "0:16:48", "remaining_time": "0:00:10", "throughput": 3326.31, "total_tokens": 3353488}
|
| 1655 |
+
{"current_steps": 8180, "total_steps": 8260, "loss": 0.148, "lr": 1.4645097797694186e-08, "epoch": 9.90314769975787, "percentage": 99.03, "elapsed_time": "0:16:48", "remaining_time": "0:00:09", "throughput": 3326.59, "total_tokens": 3355440}
|
| 1656 |
+
{"current_steps": 8185, "total_steps": 8260, "loss": 0.2169, "lr": 1.2893015263459874e-08, "epoch": 9.909200968523002, "percentage": 99.09, "elapsed_time": "0:16:49", "remaining_time": "0:00:09", "throughput": 3326.79, "total_tokens": 3357296}
|
| 1657 |
+
{"current_steps": 8190, "total_steps": 8260, "loss": 0.1789, "lr": 1.125249323004618e-08, "epoch": 9.915254237288135, "percentage": 99.15, "elapsed_time": "0:16:49", "remaining_time": "0:00:08", "throughput": 3327.04, "total_tokens": 3359280}
|
| 1658 |
+
{"current_steps": 8195, "total_steps": 8260, "loss": 0.1882, "lr": 9.723539021927463e-09, "epoch": 9.92130750605327, "percentage": 99.21, "elapsed_time": "0:16:50", "remaining_time": "0:00:08", "throughput": 3327.33, "total_tokens": 3361328}
|
| 1659 |
+
{"current_steps": 8200, "total_steps": 8260, "loss": 0.1793, "lr": 8.306159465459872e-09, "epoch": 9.927360774818402, "percentage": 99.27, "elapsed_time": "0:16:50", "remaining_time": "0:00:07", "throughput": 3327.53, "total_tokens": 3363344}
|
| 1660 |
+
{"current_steps": 8205, "total_steps": 8260, "loss": 0.178, "lr": 7.00036088885081e-09, "epoch": 9.933414043583536, "percentage": 99.33, "elapsed_time": "0:16:51", "remaining_time": "0:00:06", "throughput": 3327.75, "total_tokens": 3365296}
|
| 1661 |
+
{"current_steps": 8210, "total_steps": 8260, "loss": 0.1711, "lr": 5.806149122128401e-09, "epoch": 9.939467312348668, "percentage": 99.39, "elapsed_time": "0:16:51", "remaining_time": "0:00:06", "throughput": 3327.97, "total_tokens": 3367504}
|
| 1662 |
+
{"current_steps": 8215, "total_steps": 8260, "loss": 0.2003, "lr": 4.723529497113743e-09, "epoch": 9.9455205811138, "percentage": 99.46, "elapsed_time": "0:16:52", "remaining_time": "0:00:05", "throughput": 3328.18, "total_tokens": 3369616}
|
| 1663 |
+
{"current_steps": 8220, "total_steps": 8260, "loss": 0.1774, "lr": 3.752506847407023e-09, "epoch": 9.951573849878935, "percentage": 99.52, "elapsed_time": "0:16:53", "remaining_time": "0:00:04", "throughput": 3328.38, "total_tokens": 3371728}
|
| 1664 |
+
{"current_steps": 8225, "total_steps": 8260, "loss": 0.2027, "lr": 2.8930855083542096e-09, "epoch": 9.957627118644067, "percentage": 99.58, "elapsed_time": "0:16:53", "remaining_time": "0:00:04", "throughput": 3328.56, "total_tokens": 3373648}
|
| 1665 |
+
{"current_steps": 8230, "total_steps": 8260, "loss": 0.1772, "lr": 2.145269317033183e-09, "epoch": 9.963680387409202, "percentage": 99.64, "elapsed_time": "0:16:54", "remaining_time": "0:00:03", "throughput": 3328.76, "total_tokens": 3375664}
|
| 1666 |
+
{"current_steps": 8235, "total_steps": 8260, "loss": 0.1493, "lr": 1.509061612234297e-09, "epoch": 9.969733656174334, "percentage": 99.7, "elapsed_time": "0:16:54", "remaining_time": "0:00:03", "throughput": 3328.98, "total_tokens": 3377808}
|
| 1667 |
+
{"current_steps": 8240, "total_steps": 8260, "loss": 0.1874, "lr": 9.844652344492832e-10, "epoch": 9.975786924939467, "percentage": 99.76, "elapsed_time": "0:16:55", "remaining_time": "0:00:02", "throughput": 3329.2, "total_tokens": 3379888}
|
| 1668 |
+
{"current_steps": 8245, "total_steps": 8260, "loss": 0.2404, "lr": 5.714825258545942e-10, "epoch": 9.9818401937046, "percentage": 99.82, "elapsed_time": "0:16:55", "remaining_time": "0:00:01", "throughput": 3329.48, "total_tokens": 3382064}
|
| 1669 |
+
{"current_steps": 8250, "total_steps": 8260, "loss": 0.1511, "lr": 2.7011533030585347e-10, "epoch": 9.987893462469733, "percentage": 99.88, "elapsed_time": "0:16:56", "remaining_time": "0:00:01", "throughput": 3329.72, "total_tokens": 3384144}
|
| 1670 |
+
{"current_steps": 8255, "total_steps": 8260, "loss": 0.1651, "lr": 8.036499332397807e-11, "epoch": 9.993946731234868, "percentage": 99.94, "elapsed_time": "0:16:56", "remaining_time": "0:00:00", "throughput": 3329.99, "total_tokens": 3386160}
|
| 1671 |
+
{"current_steps": 8260, "total_steps": 8260, "loss": 0.2114, "lr": 2.2323620896269604e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:16:57", "remaining_time": "0:00:00", "throughput": 3329.75, "total_tokens": 3388032}
|
| 1672 |
+
{"current_steps": 8260, "total_steps": 8260, "eval_loss": 0.18457433581352234, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:02", "remaining_time": "0:00:00", "throughput": 3314.65, "total_tokens": 3388032}
|
| 1673 |
+
{"current_steps": 8260, "total_steps": 8260, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:03", "remaining_time": "0:00:00", "throughput": 3311.36, "total_tokens": 3388032}
|