rbelanec commited on
Commit
c377ef2
·
verified ·
1 Parent(s): 257ad13

Training in progress, step 8260

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +84 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c65471029371ea52c2fd3c688e5935482fc9d2c82263fa74dd7d87f703e84a0
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78862470573cdb636d9f234a01a45aada4cb58552d5ca79e35fc8c9d0cc32b81
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -1587,3 +1587,87 @@
1587
  {"current_steps": 7845, "total_steps": 8260, "loss": 0.2058, "lr": 3.8532971065055045e-07, "epoch": 9.497578692493947, "percentage": 94.98, "elapsed_time": "0:26:47", "remaining_time": "0:01:25", "throughput": 2001.32, "total_tokens": 3217552}
1588
  {"current_steps": 7847, "total_steps": 8260, "eval_loss": 0.19325600564479828, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:26:55", "remaining_time": "0:01:25", "throughput": 1991.84, "total_tokens": 3218352}
1589
  {"current_steps": 7850, "total_steps": 8260, "loss": 0.179, "lr": 3.761458013178648e-07, "epoch": 9.50363196125908, "percentage": 95.04, "elapsed_time": "0:26:57", "remaining_time": "0:01:24", "throughput": 1990.92, "total_tokens": 3219664}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1587
  {"current_steps": 7845, "total_steps": 8260, "loss": 0.2058, "lr": 3.8532971065055045e-07, "epoch": 9.497578692493947, "percentage": 94.98, "elapsed_time": "0:26:47", "remaining_time": "0:01:25", "throughput": 2001.32, "total_tokens": 3217552}
1588
  {"current_steps": 7847, "total_steps": 8260, "eval_loss": 0.19325600564479828, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:26:55", "remaining_time": "0:01:25", "throughput": 1991.84, "total_tokens": 3218352}
1589
  {"current_steps": 7850, "total_steps": 8260, "loss": 0.179, "lr": 3.761458013178648e-07, "epoch": 9.50363196125908, "percentage": 95.04, "elapsed_time": "0:26:57", "remaining_time": "0:01:24", "throughput": 1990.92, "total_tokens": 3219664}
1590
+ {"current_steps": 7855, "total_steps": 8260, "loss": 0.1722, "lr": 3.670718306630766e-07, "epoch": 9.509685230024212, "percentage": 95.1, "elapsed_time": "0:26:58", "remaining_time": "0:01:23", "throughput": 1991.03, "total_tokens": 3221648}
1591
+ {"current_steps": 7860, "total_steps": 8260, "loss": 0.1579, "lr": 3.5810783919895673e-07, "epoch": 9.515738498789347, "percentage": 95.16, "elapsed_time": "0:26:59", "remaining_time": "0:01:22", "throughput": 1991.11, "total_tokens": 3223632}
1592
+ {"current_steps": 7865, "total_steps": 8260, "loss": 0.2126, "lr": 3.4925386694723284e-07, "epoch": 9.521791767554479, "percentage": 95.22, "elapsed_time": "0:26:59", "remaining_time": "0:01:21", "throughput": 1991.21, "total_tokens": 3225616}
1593
+ {"current_steps": 7870, "total_steps": 8260, "loss": 0.1998, "lr": 3.405099534384393e-07, "epoch": 9.527845036319613, "percentage": 95.28, "elapsed_time": "0:27:00", "remaining_time": "0:01:20", "throughput": 1991.39, "total_tokens": 3227728}
1594
+ {"current_steps": 7875, "total_steps": 8260, "loss": 0.2009, "lr": 3.31876137711723e-07, "epoch": 9.533898305084746, "percentage": 95.34, "elapsed_time": "0:27:01", "remaining_time": "0:01:19", "throughput": 1991.51, "total_tokens": 3229744}
1595
+ {"current_steps": 7880, "total_steps": 8260, "loss": 0.202, "lr": 3.233524583146741e-07, "epoch": 9.539951573849878, "percentage": 95.4, "elapsed_time": "0:27:02", "remaining_time": "0:01:18", "throughput": 1991.62, "total_tokens": 3231664}
1596
+ {"current_steps": 7885, "total_steps": 8260, "loss": 0.1636, "lr": 3.149389533031566e-07, "epoch": 9.546004842615012, "percentage": 95.46, "elapsed_time": "0:27:03", "remaining_time": "0:01:17", "throughput": 1991.73, "total_tokens": 3233712}
1597
+ {"current_steps": 7890, "total_steps": 8260, "loss": 0.1716, "lr": 3.066356602411419e-07, "epoch": 9.552058111380145, "percentage": 95.52, "elapsed_time": "0:27:04", "remaining_time": "0:01:16", "throughput": 1991.82, "total_tokens": 3235728}
1598
+ {"current_steps": 7895, "total_steps": 8260, "loss": 0.221, "lr": 2.984426162005227e-07, "epoch": 9.558111380145279, "percentage": 95.58, "elapsed_time": "0:27:05", "remaining_time": "0:01:15", "throughput": 1991.93, "total_tokens": 3237712}
1599
+ {"current_steps": 7900, "total_steps": 8260, "loss": 0.2248, "lr": 2.903598577609717e-07, "epoch": 9.564164648910412, "percentage": 95.64, "elapsed_time": "0:27:06", "remaining_time": "0:01:14", "throughput": 1992.01, "total_tokens": 3239664}
1600
+ {"current_steps": 7905, "total_steps": 8260, "loss": 0.2, "lr": 2.823874210097638e-07, "epoch": 9.570217917675544, "percentage": 95.7, "elapsed_time": "0:27:07", "remaining_time": "0:01:13", "throughput": 1992.16, "total_tokens": 3241776}
1601
+ {"current_steps": 7910, "total_steps": 8260, "loss": 0.2053, "lr": 2.745253415416177e-07, "epoch": 9.576271186440678, "percentage": 95.76, "elapsed_time": "0:27:08", "remaining_time": "0:01:12", "throughput": 1992.27, "total_tokens": 3243824}
1602
+ {"current_steps": 7915, "total_steps": 8260, "loss": 0.1827, "lr": 2.6677365445852976e-07, "epoch": 9.58232445520581, "percentage": 95.82, "elapsed_time": "0:27:09", "remaining_time": "0:01:11", "throughput": 1992.44, "total_tokens": 3246000}
1603
+ {"current_steps": 7920, "total_steps": 8260, "loss": 0.1771, "lr": 2.5913239436964054e-07, "epoch": 9.588377723970945, "percentage": 95.88, "elapsed_time": "0:27:10", "remaining_time": "0:01:09", "throughput": 1992.58, "total_tokens": 3248112}
1604
+ {"current_steps": 7925, "total_steps": 8260, "loss": 0.174, "lr": 2.5160159539105443e-07, "epoch": 9.594430992736077, "percentage": 95.94, "elapsed_time": "0:27:11", "remaining_time": "0:01:08", "throughput": 1992.73, "total_tokens": 3250256}
1605
+ {"current_steps": 7930, "total_steps": 8260, "loss": 0.2233, "lr": 2.441812911456981e-07, "epoch": 9.600484261501212, "percentage": 96.0, "elapsed_time": "0:27:12", "remaining_time": "0:01:07", "throughput": 1992.86, "total_tokens": 3252368}
1606
+ {"current_steps": 7935, "total_steps": 8260, "loss": 0.2179, "lr": 2.3687151476317337e-07, "epoch": 9.606537530266344, "percentage": 96.07, "elapsed_time": "0:27:12", "remaining_time": "0:01:06", "throughput": 1993.0, "total_tokens": 3254416}
1607
+ {"current_steps": 7940, "total_steps": 8260, "loss": 0.2432, "lr": 2.2967229887960186e-07, "epoch": 9.612590799031477, "percentage": 96.13, "elapsed_time": "0:27:13", "remaining_time": "0:01:05", "throughput": 1993.12, "total_tokens": 3256496}
1608
+ {"current_steps": 7945, "total_steps": 8260, "loss": 0.2408, "lr": 2.2258367563748884e-07, "epoch": 9.61864406779661, "percentage": 96.19, "elapsed_time": "0:27:14", "remaining_time": "0:01:04", "throughput": 1993.25, "total_tokens": 3258576}
1609
+ {"current_steps": 7950, "total_steps": 8260, "loss": 0.2212, "lr": 2.1560567668556797e-07, "epoch": 9.624697336561743, "percentage": 96.25, "elapsed_time": "0:27:15", "remaining_time": "0:01:03", "throughput": 1993.35, "total_tokens": 3260496}
1610
+ {"current_steps": 7955, "total_steps": 8260, "loss": 0.2182, "lr": 2.0873833317866798e-07, "epoch": 9.630750605326877, "percentage": 96.31, "elapsed_time": "0:27:16", "remaining_time": "0:01:02", "throughput": 1993.51, "total_tokens": 3262608}
1611
+ {"current_steps": 7960, "total_steps": 8260, "loss": 0.1838, "lr": 2.019816757775711e-07, "epoch": 9.63680387409201, "percentage": 96.37, "elapsed_time": "0:27:17", "remaining_time": "0:01:01", "throughput": 1993.61, "total_tokens": 3264592}
1612
+ {"current_steps": 7965, "total_steps": 8260, "loss": 0.1659, "lr": 1.9533573464888543e-07, "epoch": 9.642857142857142, "percentage": 96.43, "elapsed_time": "0:27:18", "remaining_time": "0:01:00", "throughput": 1993.75, "total_tokens": 3266704}
1613
+ {"current_steps": 7970, "total_steps": 8260, "loss": 0.2012, "lr": 1.8880053946488675e-07, "epoch": 9.648910411622277, "percentage": 96.49, "elapsed_time": "0:27:19", "remaining_time": "0:00:59", "throughput": 1993.89, "total_tokens": 3268816}
1614
+ {"current_steps": 7975, "total_steps": 8260, "loss": 0.2205, "lr": 1.8237611940341291e-07, "epoch": 9.654963680387409, "percentage": 96.55, "elapsed_time": "0:27:20", "remaining_time": "0:00:58", "throughput": 1993.99, "total_tokens": 3270864}
1615
+ {"current_steps": 7980, "total_steps": 8260, "loss": 0.203, "lr": 1.760625031477142e-07, "epoch": 9.661016949152543, "percentage": 96.61, "elapsed_time": "0:27:21", "remaining_time": "0:00:57", "throughput": 1994.11, "total_tokens": 3272944}
1616
+ {"current_steps": 7985, "total_steps": 8260, "loss": 0.1879, "lr": 1.6985971888633935e-07, "epoch": 9.667070217917676, "percentage": 96.67, "elapsed_time": "0:27:22", "remaining_time": "0:00:56", "throughput": 1994.22, "total_tokens": 3274992}
1617
+ {"current_steps": 7990, "total_steps": 8260, "loss": 0.1322, "lr": 1.637677943129967e-07, "epoch": 9.673123486682808, "percentage": 96.73, "elapsed_time": "0:27:23", "remaining_time": "0:00:55", "throughput": 1994.31, "total_tokens": 3277008}
1618
+ {"current_steps": 7995, "total_steps": 8260, "loss": 0.1864, "lr": 1.5778675662643793e-07, "epoch": 9.679176755447942, "percentage": 96.79, "elapsed_time": "0:27:24", "remaining_time": "0:00:54", "throughput": 1994.42, "total_tokens": 3278928}
1619
+ {"current_steps": 8000, "total_steps": 8260, "loss": 0.2052, "lr": 1.5191663253034116e-07, "epoch": 9.685230024213075, "percentage": 96.85, "elapsed_time": "0:27:24", "remaining_time": "0:00:53", "throughput": 1994.54, "total_tokens": 3280944}
1620
+ {"current_steps": 8005, "total_steps": 8260, "loss": 0.2065, "lr": 1.461574482331779e-07, "epoch": 9.69128329297821, "percentage": 96.91, "elapsed_time": "0:27:25", "remaining_time": "0:00:52", "throughput": 1994.66, "total_tokens": 3282960}
1621
+ {"current_steps": 8010, "total_steps": 8260, "loss": 0.1541, "lr": 1.4050922944811305e-07, "epoch": 9.697336561743342, "percentage": 96.97, "elapsed_time": "0:27:26", "remaining_time": "0:00:51", "throughput": 1994.79, "total_tokens": 3285008}
1622
+ {"current_steps": 8015, "total_steps": 8260, "loss": 0.2235, "lr": 1.349720013928718e-07, "epoch": 9.703389830508474, "percentage": 97.03, "elapsed_time": "0:27:27", "remaining_time": "0:00:50", "throughput": 1994.94, "total_tokens": 3287088}
1623
+ {"current_steps": 8020, "total_steps": 8260, "loss": 0.206, "lr": 1.2954578878964507e-07, "epoch": 9.709443099273608, "percentage": 97.09, "elapsed_time": "0:27:28", "remaining_time": "0:00:49", "throughput": 1995.06, "total_tokens": 3289168}
1624
+ {"current_steps": 8025, "total_steps": 8260, "loss": 0.1927, "lr": 1.2423061586496477e-07, "epoch": 9.71549636803874, "percentage": 97.15, "elapsed_time": "0:27:29", "remaining_time": "0:00:48", "throughput": 1995.23, "total_tokens": 3291376}
1625
+ {"current_steps": 8030, "total_steps": 8260, "loss": 0.19, "lr": 1.1902650634960378e-07, "epoch": 9.721549636803875, "percentage": 97.22, "elapsed_time": "0:27:30", "remaining_time": "0:00:47", "throughput": 1995.3, "total_tokens": 3293360}
1626
+ {"current_steps": 8035, "total_steps": 8260, "loss": 0.1966, "lr": 1.1393348347846777e-07, "epoch": 9.727602905569007, "percentage": 97.28, "elapsed_time": "0:27:31", "remaining_time": "0:00:46", "throughput": 1995.44, "total_tokens": 3295344}
1627
+ {"current_steps": 8040, "total_steps": 8260, "loss": 0.1413, "lr": 1.0895156999048972e-07, "epoch": 9.73365617433414, "percentage": 97.34, "elapsed_time": "0:27:32", "remaining_time": "0:00:45", "throughput": 1995.54, "total_tokens": 3297392}
1628
+ {"current_steps": 8045, "total_steps": 8260, "loss": 0.2276, "lr": 1.0408078812853273e-07, "epoch": 9.739709443099274, "percentage": 97.4, "elapsed_time": "0:27:33", "remaining_time": "0:00:44", "throughput": 1995.64, "total_tokens": 3299376}
1629
+ {"current_steps": 8050, "total_steps": 8260, "loss": 0.1753, "lr": 9.932115963928734e-08, "epoch": 9.745762711864407, "percentage": 97.46, "elapsed_time": "0:27:34", "remaining_time": "0:00:43", "throughput": 1995.71, "total_tokens": 3301360}
1630
+ {"current_steps": 8055, "total_steps": 8260, "loss": 0.2308, "lr": 9.467270577317167e-08, "epoch": 9.75181598062954, "percentage": 97.52, "elapsed_time": "0:27:35", "remaining_time": "0:00:42", "throughput": 1995.84, "total_tokens": 3303440}
1631
+ {"current_steps": 8060, "total_steps": 8260, "loss": 0.1754, "lr": 9.013544728424528e-08, "epoch": 9.757869249394673, "percentage": 97.58, "elapsed_time": "0:27:36", "remaining_time": "0:00:41", "throughput": 1995.98, "total_tokens": 3305552}
1632
+ {"current_steps": 8065, "total_steps": 8260, "loss": 0.1779, "lr": 8.570940443010655e-08, "epoch": 9.763922518159806, "percentage": 97.64, "elapsed_time": "0:27:37", "remaining_time": "0:00:40", "throughput": 1996.15, "total_tokens": 3307728}
1633
+ {"current_steps": 8070, "total_steps": 8260, "loss": 0.204, "lr": 8.139459697181218e-08, "epoch": 9.76997578692494, "percentage": 97.7, "elapsed_time": "0:27:37", "remaining_time": "0:00:39", "throughput": 1996.27, "total_tokens": 3309776}
1634
+ {"current_steps": 8075, "total_steps": 8260, "loss": 0.1489, "lr": 7.719104417377443e-08, "epoch": 9.776029055690072, "percentage": 97.76, "elapsed_time": "0:27:38", "remaining_time": "0:00:38", "throughput": 1996.34, "total_tokens": 3311760}
1635
+ {"current_steps": 8080, "total_steps": 8260, "loss": 0.1433, "lr": 7.30987648036946e-08, "epoch": 9.782082324455207, "percentage": 97.82, "elapsed_time": "0:27:39", "remaining_time": "0:00:36", "throughput": 1996.44, "total_tokens": 3313808}
1636
+ {"current_steps": 8085, "total_steps": 8260, "loss": 0.1607, "lr": 6.911777713246581e-08, "epoch": 9.788135593220339, "percentage": 97.88, "elapsed_time": "0:27:40", "remaining_time": "0:00:35", "throughput": 1996.56, "total_tokens": 3315888}
1637
+ {"current_steps": 8090, "total_steps": 8260, "loss": 0.1933, "lr": 6.524809893409256e-08, "epoch": 9.794188861985472, "percentage": 97.94, "elapsed_time": "0:27:41", "remaining_time": "0:00:34", "throughput": 1996.69, "total_tokens": 3318000}
1638
+ {"current_steps": 8095, "total_steps": 8260, "loss": 0.139, "lr": 6.148974748561299e-08, "epoch": 9.800242130750606, "percentage": 98.0, "elapsed_time": "0:27:42", "remaining_time": "0:00:33", "throughput": 1996.81, "total_tokens": 3320016}
1639
+ {"current_steps": 8100, "total_steps": 8260, "loss": 0.1946, "lr": 5.784273956702391e-08, "epoch": 9.806295399515738, "percentage": 98.06, "elapsed_time": "0:27:43", "remaining_time": "0:00:32", "throughput": 1996.93, "total_tokens": 3322096}
1640
+ {"current_steps": 8105, "total_steps": 8260, "loss": 0.1496, "lr": 5.4307091461205936e-08, "epoch": 9.812348668280872, "percentage": 98.12, "elapsed_time": "0:27:44", "remaining_time": "0:00:31", "throughput": 1997.05, "total_tokens": 3324176}
1641
+ {"current_steps": 8110, "total_steps": 8260, "loss": 0.2244, "lr": 5.08828189538485e-08, "epoch": 9.818401937046005, "percentage": 98.18, "elapsed_time": "0:27:45", "remaining_time": "0:00:30", "throughput": 1997.2, "total_tokens": 3326320}
1642
+ {"current_steps": 8115, "total_steps": 8260, "loss": 0.1776, "lr": 4.7569937333372115e-08, "epoch": 9.824455205811137, "percentage": 98.24, "elapsed_time": "0:27:46", "remaining_time": "0:00:29", "throughput": 1997.35, "total_tokens": 3328464}
1643
+ {"current_steps": 8120, "total_steps": 8260, "loss": 0.1348, "lr": 4.436846139087847e-08, "epoch": 9.830508474576272, "percentage": 98.31, "elapsed_time": "0:27:47", "remaining_time": "0:00:28", "throughput": 1997.44, "total_tokens": 3330480}
1644
+ {"current_steps": 8125, "total_steps": 8260, "loss": 0.2058, "lr": 4.127840542006711e-08, "epoch": 9.836561743341404, "percentage": 98.37, "elapsed_time": "0:27:48", "remaining_time": "0:00:27", "throughput": 1997.59, "total_tokens": 3332624}
1645
+ {"current_steps": 8130, "total_steps": 8260, "loss": 0.1619, "lr": 3.829978321718553e-08, "epoch": 9.842615012106538, "percentage": 98.43, "elapsed_time": "0:27:49", "remaining_time": "0:00:26", "throughput": 1997.73, "total_tokens": 3334768}
1646
+ {"current_steps": 8135, "total_steps": 8260, "loss": 0.179, "lr": 3.543260808095139e-08, "epoch": 9.84866828087167, "percentage": 98.49, "elapsed_time": "0:27:50", "remaining_time": "0:00:25", "throughput": 1997.85, "total_tokens": 3336784}
1647
+ {"current_steps": 8140, "total_steps": 8260, "loss": 0.1607, "lr": 3.267689281250541e-08, "epoch": 9.854721549636803, "percentage": 98.55, "elapsed_time": "0:27:51", "remaining_time": "0:00:24", "throughput": 1997.98, "total_tokens": 3338832}
1648
+ {"current_steps": 8145, "total_steps": 8260, "loss": 0.2356, "lr": 3.003264971535857e-08, "epoch": 9.860774818401937, "percentage": 98.61, "elapsed_time": "0:27:52", "remaining_time": "0:00:23", "throughput": 1998.07, "total_tokens": 3340848}
1649
+ {"current_steps": 8150, "total_steps": 8260, "loss": 0.1843, "lr": 2.7499890595314438e-08, "epoch": 9.86682808716707, "percentage": 98.67, "elapsed_time": "0:27:52", "remaining_time": "0:00:22", "throughput": 1998.21, "total_tokens": 3342960}
1650
+ {"current_steps": 8155, "total_steps": 8260, "loss": 0.1827, "lr": 2.507862676044137e-08, "epoch": 9.872881355932204, "percentage": 98.73, "elapsed_time": "0:27:53", "remaining_time": "0:00:21", "throughput": 1998.36, "total_tokens": 3345104}
1651
+ {"current_steps": 8160, "total_steps": 8260, "loss": 0.1745, "lr": 2.2768869021014274e-08, "epoch": 9.878934624697337, "percentage": 98.79, "elapsed_time": "0:27:54", "remaining_time": "0:00:20", "throughput": 1998.46, "total_tokens": 3347024}
1652
+ {"current_steps": 8165, "total_steps": 8260, "loss": 0.2058, "lr": 2.0570627689459054e-08, "epoch": 9.884987893462469, "percentage": 98.85, "elapsed_time": "0:27:55", "remaining_time": "0:00:19", "throughput": 1998.65, "total_tokens": 3349200}
1653
+ {"current_steps": 8170, "total_steps": 8260, "loss": 0.1791, "lr": 1.848391258031379e-08, "epoch": 9.891041162227603, "percentage": 98.91, "elapsed_time": "0:27:56", "remaining_time": "0:00:18", "throughput": 1998.76, "total_tokens": 3351248}
1654
+ {"current_steps": 8175, "total_steps": 8260, "loss": 0.1861, "lr": 1.6508733010184297e-08, "epoch": 9.897094430992736, "percentage": 98.97, "elapsed_time": "0:27:57", "remaining_time": "0:00:17", "throughput": 1998.95, "total_tokens": 3353488}
1655
+ {"current_steps": 8180, "total_steps": 8260, "loss": 0.1719, "lr": 1.4645097797694186e-08, "epoch": 9.90314769975787, "percentage": 99.03, "elapsed_time": "0:27:58", "remaining_time": "0:00:16", "throughput": 1999.01, "total_tokens": 3355440}
1656
+ {"current_steps": 8185, "total_steps": 8260, "loss": 0.2699, "lr": 1.2893015263459874e-08, "epoch": 9.909200968523002, "percentage": 99.09, "elapsed_time": "0:27:59", "remaining_time": "0:00:15", "throughput": 1999.1, "total_tokens": 3357296}
1657
+ {"current_steps": 8190, "total_steps": 8260, "loss": 0.2053, "lr": 1.125249323004618e-08, "epoch": 9.915254237288135, "percentage": 99.15, "elapsed_time": "0:28:00", "remaining_time": "0:00:14", "throughput": 1999.17, "total_tokens": 3359280}
1658
+ {"current_steps": 8195, "total_steps": 8260, "loss": 0.2096, "lr": 9.723539021927463e-09, "epoch": 9.92130750605327, "percentage": 99.21, "elapsed_time": "0:28:01", "remaining_time": "0:00:13", "throughput": 1999.31, "total_tokens": 3361328}
1659
+ {"current_steps": 8200, "total_steps": 8260, "loss": 0.1803, "lr": 8.306159465459872e-09, "epoch": 9.927360774818402, "percentage": 99.27, "elapsed_time": "0:28:02", "remaining_time": "0:00:12", "throughput": 1999.39, "total_tokens": 3363344}
1660
+ {"current_steps": 8205, "total_steps": 8260, "loss": 0.1881, "lr": 7.00036088885081e-09, "epoch": 9.933414043583536, "percentage": 99.33, "elapsed_time": "0:28:03", "remaining_time": "0:00:11", "throughput": 1999.47, "total_tokens": 3365296}
1661
+ {"current_steps": 8210, "total_steps": 8260, "loss": 0.1482, "lr": 5.806149122128401e-09, "epoch": 9.939467312348668, "percentage": 99.39, "elapsed_time": "0:28:04", "remaining_time": "0:00:10", "throughput": 1999.65, "total_tokens": 3367504}
1662
+ {"current_steps": 8215, "total_steps": 8260, "loss": 0.2161, "lr": 4.723529497113743e-09, "epoch": 9.9455205811138, "percentage": 99.46, "elapsed_time": "0:28:04", "remaining_time": "0:00:09", "throughput": 1999.78, "total_tokens": 3369616}
1663
+ {"current_steps": 8220, "total_steps": 8260, "loss": 0.1843, "lr": 3.752506847407023e-09, "epoch": 9.951573849878935, "percentage": 99.52, "elapsed_time": "0:28:05", "remaining_time": "0:00:08", "throughput": 1999.92, "total_tokens": 3371728}
1664
+ {"current_steps": 8225, "total_steps": 8260, "loss": 0.2074, "lr": 2.8930855083542096e-09, "epoch": 9.957627118644067, "percentage": 99.58, "elapsed_time": "0:28:06", "remaining_time": "0:00:07", "throughput": 2000.01, "total_tokens": 3373648}
1665
+ {"current_steps": 8230, "total_steps": 8260, "loss": 0.1793, "lr": 2.145269317033183e-09, "epoch": 9.963680387409202, "percentage": 99.64, "elapsed_time": "0:28:07", "remaining_time": "0:00:06", "throughput": 2000.13, "total_tokens": 3375664}
1666
+ {"current_steps": 8235, "total_steps": 8260, "loss": 0.1531, "lr": 1.509061612234297e-09, "epoch": 9.969733656174334, "percentage": 99.7, "elapsed_time": "0:28:08", "remaining_time": "0:00:05", "throughput": 2000.28, "total_tokens": 3377808}
1667
+ {"current_steps": 8240, "total_steps": 8260, "loss": 0.2167, "lr": 9.844652344492832e-10, "epoch": 9.975786924939467, "percentage": 99.76, "elapsed_time": "0:28:09", "remaining_time": "0:00:04", "throughput": 2000.39, "total_tokens": 3379888}
1668
+ {"current_steps": 8245, "total_steps": 8260, "loss": 0.216, "lr": 5.714825258545942e-10, "epoch": 9.9818401937046, "percentage": 99.82, "elapsed_time": "0:28:10", "remaining_time": "0:00:03", "throughput": 2000.58, "total_tokens": 3382064}
1669
+ {"current_steps": 8250, "total_steps": 8260, "loss": 0.1486, "lr": 2.7011533030585347e-10, "epoch": 9.987893462469733, "percentage": 99.88, "elapsed_time": "0:28:11", "remaining_time": "0:00:02", "throughput": 2000.72, "total_tokens": 3384144}
1670
+ {"current_steps": 8255, "total_steps": 8260, "loss": 0.1605, "lr": 8.036499332397807e-11, "epoch": 9.993946731234868, "percentage": 99.94, "elapsed_time": "0:28:12", "remaining_time": "0:00:01", "throughput": 2000.81, "total_tokens": 3386160}
1671
+ {"current_steps": 8260, "total_steps": 8260, "loss": 0.1835, "lr": 2.2323620896269604e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:28:13", "remaining_time": "0:00:00", "throughput": 2000.84, "total_tokens": 3388032}
1672
+ {"current_steps": 8260, "total_steps": 8260, "eval_loss": 0.1926523745059967, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:28:20", "remaining_time": "0:00:00", "throughput": 1991.82, "total_tokens": 3388032}
1673
+ {"current_steps": 8260, "total_steps": 8260, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:28:21", "remaining_time": "0:00:00", "throughput": 1990.89, "total_tokens": 3388032}