rbelanec commited on
Commit
deba640
·
verified ·
1 Parent(s): b2fac50

Training in progress, step 2400

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +41 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72f1176f7ae6aaea19c8541466038c819213a9d310410ff83f5d00a985db3be8
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3e072ec04a1a70a11ec71732c99e0a16db4480ef0c254702b64bcfd5f8f68e
3
  size 798032
trainer_log.jsonl CHANGED
@@ -449,3 +449,44 @@
449
  {"current_steps": 2195, "total_steps": 20000, "loss": 0.1669, "lr": 4.852999010000173e-05, "epoch": 0.3974739129450644, "percentage": 10.97, "elapsed_time": "2:32:42", "remaining_time": "20:38:40", "throughput": 1736.86, "total_tokens": 15913472}
450
  {"current_steps": 2200, "total_steps": 20000, "loss": 0.1823, "lr": 4.852334915436106e-05, "epoch": 0.3983793204916363, "percentage": 11.0, "elapsed_time": "2:32:52", "remaining_time": "20:36:55", "throughput": 1738.59, "total_tokens": 15947520}
451
  {"current_steps": 2200, "total_steps": 20000, "eval_loss": 0.17183205485343933, "epoch": 0.3983793204916363, "percentage": 11.0, "elapsed_time": "2:40:02", "remaining_time": "21:34:53", "throughput": 1660.75, "total_tokens": 15947520}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  {"current_steps": 2195, "total_steps": 20000, "loss": 0.1669, "lr": 4.852999010000173e-05, "epoch": 0.3974739129450644, "percentage": 10.97, "elapsed_time": "2:32:42", "remaining_time": "20:38:40", "throughput": 1736.86, "total_tokens": 15913472}
450
  {"current_steps": 2200, "total_steps": 20000, "loss": 0.1823, "lr": 4.852334915436106e-05, "epoch": 0.3983793204916363, "percentage": 11.0, "elapsed_time": "2:32:52", "remaining_time": "20:36:55", "throughput": 1738.59, "total_tokens": 15947520}
451
  {"current_steps": 2200, "total_steps": 20000, "eval_loss": 0.17183205485343933, "epoch": 0.3983793204916363, "percentage": 11.0, "elapsed_time": "2:40:02", "remaining_time": "21:34:53", "throughput": 1660.75, "total_tokens": 15947520}
452
+ {"current_steps": 2205, "total_steps": 20000, "loss": 0.1738, "lr": 4.851669369833673e-05, "epoch": 0.3992847280382082, "percentage": 11.03, "elapsed_time": "2:40:13", "remaining_time": "21:33:06", "throughput": 1662.42, "total_tokens": 15982336}
453
+ {"current_steps": 2210, "total_steps": 20000, "loss": 0.1411, "lr": 4.851002373603417e-05, "epoch": 0.4001901355847801, "percentage": 11.05, "elapsed_time": "2:40:24", "remaining_time": "21:31:16", "throughput": 1664.22, "total_tokens": 16017600}
454
+ {"current_steps": 2215, "total_steps": 20000, "loss": 0.1611, "lr": 4.850333927156775e-05, "epoch": 0.401095543131352, "percentage": 11.07, "elapsed_time": "2:40:35", "remaining_time": "21:29:29", "throughput": 1666.12, "total_tokens": 16054464}
455
+ {"current_steps": 2220, "total_steps": 20000, "loss": 0.1774, "lr": 4.849664030906077e-05, "epoch": 0.4020009506779239, "percentage": 11.1, "elapsed_time": "2:40:46", "remaining_time": "21:27:42", "throughput": 1667.93, "total_tokens": 16090432}
456
+ {"current_steps": 2225, "total_steps": 20000, "loss": 0.1608, "lr": 4.8489926852645505e-05, "epoch": 0.4029063582244958, "percentage": 11.12, "elapsed_time": "2:40:58", "remaining_time": "21:25:55", "throughput": 1669.81, "total_tokens": 16127104}
457
+ {"current_steps": 2230, "total_steps": 20000, "loss": 0.1574, "lr": 4.848319890646315e-05, "epoch": 0.4038117657710677, "percentage": 11.15, "elapsed_time": "2:41:09", "remaining_time": "21:24:09", "throughput": 1671.68, "total_tokens": 16163776}
458
+ {"current_steps": 2235, "total_steps": 20000, "loss": 0.1552, "lr": 4.847645647466382e-05, "epoch": 0.4047171733176396, "percentage": 11.18, "elapsed_time": "2:41:20", "remaining_time": "21:22:22", "throughput": 1673.48, "total_tokens": 16199296}
459
+ {"current_steps": 2240, "total_steps": 20000, "loss": 0.1506, "lr": 4.846969956140662e-05, "epoch": 0.4056225808642115, "percentage": 11.2, "elapsed_time": "2:41:30", "remaining_time": "21:20:34", "throughput": 1675.31, "total_tokens": 16235200}
460
+ {"current_steps": 2245, "total_steps": 20000, "loss": 0.1485, "lr": 4.8462928170859525e-05, "epoch": 0.4065279884107834, "percentage": 11.22, "elapsed_time": "2:41:41", "remaining_time": "21:18:47", "throughput": 1677.08, "total_tokens": 16270464}
461
+ {"current_steps": 2250, "total_steps": 20000, "loss": 0.1501, "lr": 4.845614230719947e-05, "epoch": 0.4074333959573553, "percentage": 11.25, "elapsed_time": "2:41:53", "remaining_time": "21:17:06", "throughput": 1679.02, "total_tokens": 16308736}
462
+ {"current_steps": 2255, "total_steps": 20000, "loss": 0.1709, "lr": 4.8449341974612334e-05, "epoch": 0.4083388035039272, "percentage": 11.28, "elapsed_time": "2:42:04", "remaining_time": "21:15:23", "throughput": 1680.85, "total_tokens": 16345280}
463
+ {"current_steps": 2260, "total_steps": 20000, "loss": 0.1555, "lr": 4.844252717729289e-05, "epoch": 0.40924421105049913, "percentage": 11.3, "elapsed_time": "2:42:14", "remaining_time": "21:13:33", "throughput": 1682.53, "total_tokens": 16379008}
464
+ {"current_steps": 2265, "total_steps": 20000, "loss": 0.1827, "lr": 4.843569791944486e-05, "epoch": 0.410149618597071, "percentage": 11.33, "elapsed_time": "2:42:25", "remaining_time": "21:11:51", "throughput": 1684.36, "total_tokens": 16415680}
465
+ {"current_steps": 2270, "total_steps": 20000, "loss": 0.1653, "lr": 4.842885420528085e-05, "epoch": 0.4110550261436429, "percentage": 11.35, "elapsed_time": "2:42:37", "remaining_time": "21:10:11", "throughput": 1686.23, "total_tokens": 16453376}
466
+ {"current_steps": 2275, "total_steps": 20000, "loss": 0.1831, "lr": 4.8421996039022436e-05, "epoch": 0.4119604336902148, "percentage": 11.38, "elapsed_time": "2:42:47", "remaining_time": "21:08:24", "throughput": 1687.93, "total_tokens": 16487552}
467
+ {"current_steps": 2280, "total_steps": 20000, "loss": 0.1941, "lr": 4.841512342490006e-05, "epoch": 0.4128658412367867, "percentage": 11.4, "elapsed_time": "2:42:59", "remaining_time": "21:06:41", "throughput": 1689.75, "total_tokens": 16524032}
468
+ {"current_steps": 2285, "total_steps": 20000, "loss": 0.1507, "lr": 4.840823636715309e-05, "epoch": 0.41377124878335864, "percentage": 11.43, "elapsed_time": "2:43:10", "remaining_time": "21:05:01", "throughput": 1691.55, "total_tokens": 16560704}
469
+ {"current_steps": 2290, "total_steps": 20000, "loss": 0.1668, "lr": 4.840133487002984e-05, "epoch": 0.4146766563299305, "percentage": 11.45, "elapsed_time": "2:43:21", "remaining_time": "21:03:19", "throughput": 1693.32, "total_tokens": 16596608}
470
+ {"current_steps": 2295, "total_steps": 20000, "loss": 0.175, "lr": 4.839441893778747e-05, "epoch": 0.4155820638765024, "percentage": 11.47, "elapsed_time": "2:43:32", "remaining_time": "21:01:36", "throughput": 1695.06, "total_tokens": 16632256}
471
+ {"current_steps": 2300, "total_steps": 20000, "loss": 0.1918, "lr": 4.838748857469208e-05, "epoch": 0.4164874714230743, "percentage": 11.5, "elapsed_time": "2:43:43", "remaining_time": "20:59:57", "throughput": 1696.9, "total_tokens": 16669312}
472
+ {"current_steps": 2305, "total_steps": 20000, "loss": 0.1846, "lr": 4.8380543785018677e-05, "epoch": 0.4173928789696462, "percentage": 11.53, "elapsed_time": "2:43:54", "remaining_time": "20:58:18", "throughput": 1698.72, "total_tokens": 16706240}
473
+ {"current_steps": 2310, "total_steps": 20000, "loss": 0.1573, "lr": 4.837358457305116e-05, "epoch": 0.4182982865162181, "percentage": 11.55, "elapsed_time": "2:44:06", "remaining_time": "20:56:41", "throughput": 1700.56, "total_tokens": 16743936}
474
+ {"current_steps": 2315, "total_steps": 20000, "loss": 0.205, "lr": 4.836661094308229e-05, "epoch": 0.41920369406279, "percentage": 11.58, "elapsed_time": "2:44:17", "remaining_time": "20:55:06", "throughput": 1702.48, "total_tokens": 16782784}
475
+ {"current_steps": 2320, "total_steps": 20000, "loss": 0.1937, "lr": 4.835962289941379e-05, "epoch": 0.42010910160936193, "percentage": 11.6, "elapsed_time": "2:44:29", "remaining_time": "20:53:31", "throughput": 1704.39, "total_tokens": 16821312}
476
+ {"current_steps": 2325, "total_steps": 20000, "loss": 0.1755, "lr": 4.835262044635621e-05, "epoch": 0.4210145091559338, "percentage": 11.62, "elapsed_time": "2:44:40", "remaining_time": "20:51:50", "throughput": 1706.11, "total_tokens": 16856640}
477
+ {"current_steps": 2330, "total_steps": 20000, "loss": 0.1576, "lr": 4.834560358822903e-05, "epoch": 0.4219199167025057, "percentage": 11.65, "elapsed_time": "2:44:51", "remaining_time": "20:50:10", "throughput": 1707.83, "total_tokens": 16892160}
478
+ {"current_steps": 2335, "total_steps": 20000, "loss": 0.1566, "lr": 4.83385723293606e-05, "epoch": 0.4228253242490776, "percentage": 11.68, "elapsed_time": "2:45:02", "remaining_time": "20:48:34", "throughput": 1709.59, "total_tokens": 16929024}
479
+ {"current_steps": 2340, "total_steps": 20000, "loss": 0.1877, "lr": 4.833152667408814e-05, "epoch": 0.4237307317956495, "percentage": 11.7, "elapsed_time": "2:45:13", "remaining_time": "20:46:59", "throughput": 1711.43, "total_tokens": 16966912}
480
+ {"current_steps": 2345, "total_steps": 20000, "loss": 0.1511, "lr": 4.8324466626757775e-05, "epoch": 0.42463613934222144, "percentage": 11.72, "elapsed_time": "2:45:25", "remaining_time": "20:45:24", "throughput": 1713.26, "total_tokens": 17004352}
481
+ {"current_steps": 2350, "total_steps": 20000, "loss": 0.1919, "lr": 4.8317392191724495e-05, "epoch": 0.4255415468887933, "percentage": 11.75, "elapsed_time": "2:45:36", "remaining_time": "20:43:47", "throughput": 1715.0, "total_tokens": 17040704}
482
+ {"current_steps": 2355, "total_steps": 20000, "loss": 0.1616, "lr": 4.831030337335217e-05, "epoch": 0.42644695443536523, "percentage": 11.77, "elapsed_time": "2:45:46", "remaining_time": "20:42:08", "throughput": 1716.67, "total_tokens": 17075584}
483
+ {"current_steps": 2360, "total_steps": 20000, "loss": 0.1679, "lr": 4.8303200176013537e-05, "epoch": 0.4273523619819371, "percentage": 11.8, "elapsed_time": "2:45:57", "remaining_time": "20:40:26", "throughput": 1718.27, "total_tokens": 17109248}
484
+ {"current_steps": 2365, "total_steps": 20000, "loss": 0.173, "lr": 4.82960826040902e-05, "epoch": 0.428257769528509, "percentage": 11.82, "elapsed_time": "2:46:08", "remaining_time": "20:38:48", "throughput": 1719.92, "total_tokens": 17144256}
485
+ {"current_steps": 2370, "total_steps": 20000, "loss": 0.1578, "lr": 4.828895066197264e-05, "epoch": 0.42916317707508095, "percentage": 11.85, "elapsed_time": "2:46:19", "remaining_time": "20:37:13", "throughput": 1721.71, "total_tokens": 17181376}
486
+ {"current_steps": 2375, "total_steps": 20000, "loss": 0.1614, "lr": 4.82818043540602e-05, "epoch": 0.4300685846216528, "percentage": 11.88, "elapsed_time": "2:46:30", "remaining_time": "20:35:36", "throughput": 1723.42, "total_tokens": 17217152}
487
+ {"current_steps": 2380, "total_steps": 20000, "loss": 0.1802, "lr": 4.827464368476108e-05, "epoch": 0.43097399216822474, "percentage": 11.9, "elapsed_time": "2:46:40", "remaining_time": "20:33:58", "throughput": 1725.02, "total_tokens": 17251328}
488
+ {"current_steps": 2385, "total_steps": 20000, "loss": 0.173, "lr": 4.8267468658492335e-05, "epoch": 0.4318793997147966, "percentage": 11.92, "elapsed_time": "2:46:52", "remaining_time": "20:32:28", "throughput": 1726.88, "total_tokens": 17289984}
489
+ {"current_steps": 2390, "total_steps": 20000, "loss": 0.1663, "lr": 4.826027927967988e-05, "epoch": 0.43278480726136853, "percentage": 11.95, "elapsed_time": "2:47:02", "remaining_time": "20:30:50", "throughput": 1728.54, "total_tokens": 17324800}
490
+ {"current_steps": 2395, "total_steps": 20000, "loss": 0.1525, "lr": 4.82530755527585e-05, "epoch": 0.4336902148079404, "percentage": 11.97, "elapsed_time": "2:47:13", "remaining_time": "20:29:13", "throughput": 1730.12, "total_tokens": 17359040}
491
+ {"current_steps": 2400, "total_steps": 20000, "loss": 0.1666, "lr": 4.8245857482171805e-05, "epoch": 0.4345956223545123, "percentage": 12.0, "elapsed_time": "2:47:24", "remaining_time": "20:27:40", "throughput": 1731.86, "total_tokens": 17395776}
492
+ {"current_steps": 2400, "total_steps": 20000, "eval_loss": 0.16846157610416412, "epoch": 0.4345956223545123, "percentage": 12.0, "elapsed_time": "2:54:34", "remaining_time": "21:20:12", "throughput": 1660.78, "total_tokens": 17395776}