Training in progress, step 26712
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +382 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaaadafbd8f1cc487675b12a192993af05cc5fe05aad9de63474a18d9a81fa6d
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -4975,3 +4975,385 @@
|
|
| 4975 |
{"current_steps": 24810, "total_steps": 38160, "loss": 0.524, "lr": 1.64387525983924e-05, "epoch": 13.00314465408805, "percentage": 65.02, "elapsed_time": "1:15:02", "remaining_time": "0:40:22", "throughput": 3594.41, "total_tokens": 16183176}
|
| 4976 |
{"current_steps": 24815, "total_steps": 38160, "loss": 0.4489, "lr": 1.6428010570841788e-05, "epoch": 13.005765199161425, "percentage": 65.03, "elapsed_time": "1:15:03", "remaining_time": "0:40:21", "throughput": 3594.28, "total_tokens": 16185928}
|
| 4977 |
{"current_steps": 24820, "total_steps": 38160, "loss": 0.5525, "lr": 1.6417270336453573e-05, "epoch": 13.0083857442348, "percentage": 65.04, "elapsed_time": "1:15:04", "remaining_time": "0:40:20", "throughput": 3594.25, "total_tokens": 16189160}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4975 |
{"current_steps": 24810, "total_steps": 38160, "loss": 0.524, "lr": 1.64387525983924e-05, "epoch": 13.00314465408805, "percentage": 65.02, "elapsed_time": "1:15:02", "remaining_time": "0:40:22", "throughput": 3594.41, "total_tokens": 16183176}
|
| 4976 |
{"current_steps": 24815, "total_steps": 38160, "loss": 0.4489, "lr": 1.6428010570841788e-05, "epoch": 13.005765199161425, "percentage": 65.03, "elapsed_time": "1:15:03", "remaining_time": "0:40:21", "throughput": 3594.28, "total_tokens": 16185928}
|
| 4977 |
{"current_steps": 24820, "total_steps": 38160, "loss": 0.5525, "lr": 1.6417270336453573e-05, "epoch": 13.0083857442348, "percentage": 65.04, "elapsed_time": "1:15:04", "remaining_time": "0:40:20", "throughput": 3594.25, "total_tokens": 16189160}
|
| 4978 |
+
{"current_steps": 24825, "total_steps": 38160, "loss": 0.332, "lr": 1.6406531897474492e-05, "epoch": 13.011006289308176, "percentage": 65.06, "elapsed_time": "1:15:05", "remaining_time": "0:40:19", "throughput": 3594.23, "total_tokens": 16192392}
|
| 4979 |
+
{"current_steps": 24830, "total_steps": 38160, "loss": 0.509, "lr": 1.639579525615089e-05, "epoch": 13.01362683438155, "percentage": 65.07, "elapsed_time": "1:15:05", "remaining_time": "0:40:19", "throughput": 3594.3, "total_tokens": 16195752}
|
| 4980 |
+
{"current_steps": 24835, "total_steps": 38160, "loss": 0.4231, "lr": 1.638506041472877e-05, "epoch": 13.016247379454926, "percentage": 65.08, "elapsed_time": "1:15:06", "remaining_time": "0:40:18", "throughput": 3594.41, "total_tokens": 16199176}
|
| 4981 |
+
{"current_steps": 24840, "total_steps": 38160, "loss": 0.6066, "lr": 1.6374327375453724e-05, "epoch": 13.018867924528301, "percentage": 65.09, "elapsed_time": "1:15:07", "remaining_time": "0:40:17", "throughput": 3594.33, "total_tokens": 16201768}
|
| 4982 |
+
{"current_steps": 24845, "total_steps": 38160, "loss": 0.4512, "lr": 1.6363596140570982e-05, "epoch": 13.021488469601676, "percentage": 65.11, "elapsed_time": "1:15:08", "remaining_time": "0:40:16", "throughput": 3594.25, "total_tokens": 16204680}
|
| 4983 |
+
{"current_steps": 24850, "total_steps": 38160, "loss": 0.4848, "lr": 1.6352866712325395e-05, "epoch": 13.024109014675052, "percentage": 65.12, "elapsed_time": "1:15:09", "remaining_time": "0:40:15", "throughput": 3594.29, "total_tokens": 16208264}
|
| 4984 |
+
{"current_steps": 24855, "total_steps": 38160, "loss": 0.4144, "lr": 1.634213909296144e-05, "epoch": 13.026729559748428, "percentage": 65.13, "elapsed_time": "1:15:10", "remaining_time": "0:40:14", "throughput": 3594.15, "total_tokens": 16210536}
|
| 4985 |
+
{"current_steps": 24860, "total_steps": 38160, "loss": 0.6053, "lr": 1.6331413284723217e-05, "epoch": 13.029350104821804, "percentage": 65.15, "elapsed_time": "1:15:11", "remaining_time": "0:40:13", "throughput": 3594.3, "total_tokens": 16214376}
|
| 4986 |
+
{"current_steps": 24865, "total_steps": 38160, "loss": 0.6, "lr": 1.6320689289854442e-05, "epoch": 13.031970649895179, "percentage": 65.16, "elapsed_time": "1:15:12", "remaining_time": "0:40:12", "throughput": 3594.28, "total_tokens": 16217416}
|
| 4987 |
+
{"current_steps": 24870, "total_steps": 38160, "loss": 0.527, "lr": 1.6309967110598438e-05, "epoch": 13.034591194968554, "percentage": 65.17, "elapsed_time": "1:15:12", "remaining_time": "0:40:11", "throughput": 3594.31, "total_tokens": 16220584}
|
| 4988 |
+
{"current_steps": 24875, "total_steps": 38160, "loss": 0.3826, "lr": 1.6299246749198183e-05, "epoch": 13.03721174004193, "percentage": 65.19, "elapsed_time": "1:15:13", "remaining_time": "0:40:10", "throughput": 3594.37, "total_tokens": 16224040}
|
| 4989 |
+
{"current_steps": 24880, "total_steps": 38160, "loss": 0.4878, "lr": 1.6288528207896236e-05, "epoch": 13.039832285115304, "percentage": 65.2, "elapsed_time": "1:15:14", "remaining_time": "0:40:09", "throughput": 3594.32, "total_tokens": 16227016}
|
| 4990 |
+
{"current_steps": 24885, "total_steps": 38160, "loss": 0.546, "lr": 1.627781148893481e-05, "epoch": 13.04245283018868, "percentage": 65.21, "elapsed_time": "1:15:15", "remaining_time": "0:40:08", "throughput": 3594.21, "total_tokens": 16229768}
|
| 4991 |
+
{"current_steps": 24890, "total_steps": 38160, "loss": 0.4111, "lr": 1.6267096594555703e-05, "epoch": 13.045073375262055, "percentage": 65.23, "elapsed_time": "1:15:16", "remaining_time": "0:40:07", "throughput": 3594.29, "total_tokens": 16233224}
|
| 4992 |
+
{"current_steps": 24895, "total_steps": 38160, "loss": 0.3778, "lr": 1.6256383527000372e-05, "epoch": 13.04769392033543, "percentage": 65.24, "elapsed_time": "1:15:17", "remaining_time": "0:40:06", "throughput": 3594.32, "total_tokens": 16236264}
|
| 4993 |
+
{"current_steps": 24900, "total_steps": 38160, "loss": 0.4303, "lr": 1.6245672288509843e-05, "epoch": 13.050314465408805, "percentage": 65.25, "elapsed_time": "1:15:18", "remaining_time": "0:40:06", "throughput": 3594.2, "total_tokens": 16238984}
|
| 4994 |
+
{"current_steps": 24905, "total_steps": 38160, "loss": 0.4066, "lr": 1.62349628813248e-05, "epoch": 13.05293501048218, "percentage": 65.26, "elapsed_time": "1:15:18", "remaining_time": "0:40:05", "throughput": 3594.1, "total_tokens": 16241608}
|
| 4995 |
+
{"current_steps": 24910, "total_steps": 38160, "loss": 0.5035, "lr": 1.6224255307685525e-05, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "1:15:19", "remaining_time": "0:40:04", "throughput": 3594.01, "total_tokens": 16244328}
|
| 4996 |
+
{"current_steps": 24915, "total_steps": 38160, "loss": 0.5151, "lr": 1.6213549569831922e-05, "epoch": 13.05817610062893, "percentage": 65.29, "elapsed_time": "1:15:20", "remaining_time": "0:40:03", "throughput": 3594.06, "total_tokens": 16247912}
|
| 4997 |
+
{"current_steps": 24920, "total_steps": 38160, "loss": 0.5611, "lr": 1.62028456700035e-05, "epoch": 13.060796645702306, "percentage": 65.3, "elapsed_time": "1:15:21", "remaining_time": "0:40:02", "throughput": 3594.1, "total_tokens": 16250920}
|
| 4998 |
+
{"current_steps": 24925, "total_steps": 38160, "loss": 0.5155, "lr": 1.619214361043942e-05, "epoch": 13.06341719077568, "percentage": 65.32, "elapsed_time": "1:15:22", "remaining_time": "0:40:01", "throughput": 3594.23, "total_tokens": 16254728}
|
| 4999 |
+
{"current_steps": 24930, "total_steps": 38160, "loss": 0.3945, "lr": 1.61814433933784e-05, "epoch": 13.066037735849056, "percentage": 65.33, "elapsed_time": "1:15:23", "remaining_time": "0:40:00", "throughput": 3594.23, "total_tokens": 16257640}
|
| 5000 |
+
{"current_steps": 24935, "total_steps": 38160, "loss": 0.5654, "lr": 1.617074502105881e-05, "epoch": 13.068658280922431, "percentage": 65.34, "elapsed_time": "1:15:24", "remaining_time": "0:39:59", "throughput": 3594.29, "total_tokens": 16260808}
|
| 5001 |
+
{"current_steps": 24940, "total_steps": 38160, "loss": 0.4256, "lr": 1.616004849571864e-05, "epoch": 13.071278825995806, "percentage": 65.36, "elapsed_time": "1:15:24", "remaining_time": "0:39:58", "throughput": 3594.37, "total_tokens": 16264264}
|
| 5002 |
+
{"current_steps": 24945, "total_steps": 38160, "loss": 0.4506, "lr": 1.6149353819595463e-05, "epoch": 13.073899371069182, "percentage": 65.37, "elapsed_time": "1:15:25", "remaining_time": "0:39:57", "throughput": 3594.4, "total_tokens": 16267528}
|
| 5003 |
+
{"current_steps": 24950, "total_steps": 38160, "loss": 0.6392, "lr": 1.6138660994926508e-05, "epoch": 13.076519916142558, "percentage": 65.38, "elapsed_time": "1:15:26", "remaining_time": "0:39:56", "throughput": 3594.34, "total_tokens": 16270088}
|
| 5004 |
+
{"current_steps": 24955, "total_steps": 38160, "loss": 0.5764, "lr": 1.612797002394857e-05, "epoch": 13.079140461215934, "percentage": 65.4, "elapsed_time": "1:15:27", "remaining_time": "0:39:55", "throughput": 3594.37, "total_tokens": 16273320}
|
| 5005 |
+
{"current_steps": 24960, "total_steps": 38160, "loss": 0.5048, "lr": 1.611728090889808e-05, "epoch": 13.081761006289309, "percentage": 65.41, "elapsed_time": "1:15:28", "remaining_time": "0:39:54", "throughput": 3594.36, "total_tokens": 16276232}
|
| 5006 |
+
{"current_steps": 24965, "total_steps": 38160, "loss": 0.5542, "lr": 1.610659365201109e-05, "epoch": 13.084381551362684, "percentage": 65.42, "elapsed_time": "1:15:29", "remaining_time": "0:39:53", "throughput": 3594.35, "total_tokens": 16279272}
|
| 5007 |
+
{"current_steps": 24970, "total_steps": 38160, "loss": 0.4745, "lr": 1.609590825552324e-05, "epoch": 13.08700209643606, "percentage": 65.44, "elapsed_time": "1:15:29", "remaining_time": "0:39:52", "throughput": 3594.33, "total_tokens": 16282024}
|
| 5008 |
+
{"current_steps": 24975, "total_steps": 38160, "loss": 0.4919, "lr": 1.6085224721669807e-05, "epoch": 13.089622641509434, "percentage": 65.45, "elapsed_time": "1:15:30", "remaining_time": "0:39:51", "throughput": 3594.3, "total_tokens": 16284808}
|
| 5009 |
+
{"current_steps": 24980, "total_steps": 38160, "loss": 0.6866, "lr": 1.607454305268566e-05, "epoch": 13.09224318658281, "percentage": 65.46, "elapsed_time": "1:15:31", "remaining_time": "0:39:50", "throughput": 3594.36, "total_tokens": 16288200}
|
| 5010 |
+
{"current_steps": 24985, "total_steps": 38160, "loss": 0.4677, "lr": 1.606386325080528e-05, "epoch": 13.094863731656185, "percentage": 65.47, "elapsed_time": "1:15:32", "remaining_time": "0:39:50", "throughput": 3594.34, "total_tokens": 16291176}
|
| 5011 |
+
{"current_steps": 24990, "total_steps": 38160, "loss": 0.539, "lr": 1.6053185318262746e-05, "epoch": 13.09748427672956, "percentage": 65.49, "elapsed_time": "1:15:33", "remaining_time": "0:39:49", "throughput": 3594.26, "total_tokens": 16293896}
|
| 5012 |
+
{"current_steps": 24995, "total_steps": 38160, "loss": 0.4161, "lr": 1.6042509257291783e-05, "epoch": 13.100104821802935, "percentage": 65.5, "elapsed_time": "1:15:34", "remaining_time": "0:39:48", "throughput": 3594.29, "total_tokens": 16297192}
|
| 5013 |
+
{"current_steps": 25000, "total_steps": 38160, "loss": 0.5575, "lr": 1.6031835070125685e-05, "epoch": 13.10272536687631, "percentage": 65.51, "elapsed_time": "1:15:35", "remaining_time": "0:39:47", "throughput": 3594.35, "total_tokens": 16300680}
|
| 5014 |
+
{"current_steps": 25005, "total_steps": 38160, "loss": 0.2918, "lr": 1.6021162758997382e-05, "epoch": 13.105345911949685, "percentage": 65.53, "elapsed_time": "1:15:35", "remaining_time": "0:39:46", "throughput": 3594.24, "total_tokens": 16303080}
|
| 5015 |
+
{"current_steps": 25010, "total_steps": 38160, "loss": 0.5577, "lr": 1.6010492326139398e-05, "epoch": 13.10796645702306, "percentage": 65.54, "elapsed_time": "1:15:36", "remaining_time": "0:39:45", "throughput": 3593.98, "total_tokens": 16305224}
|
| 5016 |
+
{"current_steps": 25015, "total_steps": 38160, "loss": 0.6102, "lr": 1.599982377378385e-05, "epoch": 13.110587002096436, "percentage": 65.55, "elapsed_time": "1:15:37", "remaining_time": "0:39:44", "throughput": 3593.99, "total_tokens": 16308168}
|
| 5017 |
+
{"current_steps": 25020, "total_steps": 38160, "loss": 0.5064, "lr": 1.5989157104162496e-05, "epoch": 13.11320754716981, "percentage": 65.57, "elapsed_time": "1:15:38", "remaining_time": "0:39:43", "throughput": 3594.12, "total_tokens": 16312072}
|
| 5018 |
+
{"current_steps": 25025, "total_steps": 38160, "loss": 0.5439, "lr": 1.5978492319506673e-05, "epoch": 13.115828092243186, "percentage": 65.58, "elapsed_time": "1:15:39", "remaining_time": "0:39:42", "throughput": 3594.1, "total_tokens": 16315112}
|
| 5019 |
+
{"current_steps": 25030, "total_steps": 38160, "loss": 0.6363, "lr": 1.596782942204733e-05, "epoch": 13.118448637316561, "percentage": 65.59, "elapsed_time": "1:15:40", "remaining_time": "0:39:41", "throughput": 3593.96, "total_tokens": 16317704}
|
| 5020 |
+
{"current_steps": 25035, "total_steps": 38160, "loss": 0.4796, "lr": 1.595716841401504e-05, "epoch": 13.121069182389936, "percentage": 65.61, "elapsed_time": "1:15:41", "remaining_time": "0:39:40", "throughput": 3594.07, "total_tokens": 16321288}
|
| 5021 |
+
{"current_steps": 25040, "total_steps": 38160, "loss": 0.3675, "lr": 1.594650929763994e-05, "epoch": 13.123689727463312, "percentage": 65.62, "elapsed_time": "1:15:41", "remaining_time": "0:39:39", "throughput": 3594.1, "total_tokens": 16324360}
|
| 5022 |
+
{"current_steps": 25045, "total_steps": 38160, "loss": 0.433, "lr": 1.5935852075151812e-05, "epoch": 13.126310272536688, "percentage": 65.63, "elapsed_time": "1:15:42", "remaining_time": "0:39:38", "throughput": 3594.17, "total_tokens": 16327784}
|
| 5023 |
+
{"current_steps": 25050, "total_steps": 38160, "loss": 0.6218, "lr": 1.5925196748780016e-05, "epoch": 13.128930817610064, "percentage": 65.64, "elapsed_time": "1:15:43", "remaining_time": "0:39:38", "throughput": 3594.25, "total_tokens": 16331848}
|
| 5024 |
+
{"current_steps": 25055, "total_steps": 38160, "loss": 0.5332, "lr": 1.5914543320753528e-05, "epoch": 13.131551362683439, "percentage": 65.66, "elapsed_time": "1:15:44", "remaining_time": "0:39:37", "throughput": 3594.41, "total_tokens": 16335592}
|
| 5025 |
+
{"current_steps": 25060, "total_steps": 38160, "loss": 0.599, "lr": 1.5903891793300932e-05, "epoch": 13.134171907756814, "percentage": 65.67, "elapsed_time": "1:15:45", "remaining_time": "0:39:36", "throughput": 3594.65, "total_tokens": 16340200}
|
| 5026 |
+
{"current_steps": 25065, "total_steps": 38160, "loss": 0.5655, "lr": 1.5893242168650403e-05, "epoch": 13.13679245283019, "percentage": 65.68, "elapsed_time": "1:15:46", "remaining_time": "0:39:35", "throughput": 3594.66, "total_tokens": 16343208}
|
| 5027 |
+
{"current_steps": 25070, "total_steps": 38160, "loss": 0.3991, "lr": 1.5882594449029705e-05, "epoch": 13.139412997903564, "percentage": 65.7, "elapsed_time": "1:15:47", "remaining_time": "0:39:34", "throughput": 3594.65, "total_tokens": 16346088}
|
| 5028 |
+
{"current_steps": 25075, "total_steps": 38160, "loss": 0.5077, "lr": 1.587194863666624e-05, "epoch": 13.14203354297694, "percentage": 65.71, "elapsed_time": "1:15:48", "remaining_time": "0:39:33", "throughput": 3594.69, "total_tokens": 16349768}
|
| 5029 |
+
{"current_steps": 25080, "total_steps": 38160, "loss": 0.5017, "lr": 1.5861304733786974e-05, "epoch": 13.144654088050315, "percentage": 65.72, "elapsed_time": "1:15:49", "remaining_time": "0:39:32", "throughput": 3594.84, "total_tokens": 16353960}
|
| 5030 |
+
{"current_steps": 25085, "total_steps": 38160, "loss": 0.6488, "lr": 1.5850662742618505e-05, "epoch": 13.14727463312369, "percentage": 65.74, "elapsed_time": "1:15:50", "remaining_time": "0:39:31", "throughput": 3594.86, "total_tokens": 16356904}
|
| 5031 |
+
{"current_steps": 25090, "total_steps": 38160, "loss": 0.4185, "lr": 1.5840022665387004e-05, "epoch": 13.149895178197065, "percentage": 65.75, "elapsed_time": "1:15:50", "remaining_time": "0:39:30", "throughput": 3594.86, "total_tokens": 16360040}
|
| 5032 |
+
{"current_steps": 25095, "total_steps": 38160, "loss": 0.5492, "lr": 1.5829384504318268e-05, "epoch": 13.15251572327044, "percentage": 65.76, "elapsed_time": "1:15:51", "remaining_time": "0:39:29", "throughput": 3594.9, "total_tokens": 16363496}
|
| 5033 |
+
{"current_steps": 25100, "total_steps": 38160, "loss": 0.4938, "lr": 1.5818748261637662e-05, "epoch": 13.155136268343815, "percentage": 65.78, "elapsed_time": "1:15:52", "remaining_time": "0:39:28", "throughput": 3594.86, "total_tokens": 16366376}
|
| 5034 |
+
{"current_steps": 25105, "total_steps": 38160, "loss": 0.5805, "lr": 1.5808113939570173e-05, "epoch": 13.15775681341719, "percentage": 65.79, "elapsed_time": "1:15:53", "remaining_time": "0:39:27", "throughput": 3594.95, "total_tokens": 16370184}
|
| 5035 |
+
{"current_steps": 25110, "total_steps": 38160, "loss": 0.4394, "lr": 1.5797481540340386e-05, "epoch": 13.160377358490566, "percentage": 65.8, "elapsed_time": "1:15:54", "remaining_time": "0:39:27", "throughput": 3595.05, "total_tokens": 16374152}
|
| 5036 |
+
{"current_steps": 25115, "total_steps": 38160, "loss": 0.4757, "lr": 1.5786851066172466e-05, "epoch": 13.16299790356394, "percentage": 65.81, "elapsed_time": "1:15:55", "remaining_time": "0:39:26", "throughput": 3595.03, "total_tokens": 16376904}
|
| 5037 |
+
{"current_steps": 25120, "total_steps": 38160, "loss": 0.4189, "lr": 1.5776222519290208e-05, "epoch": 13.165618448637316, "percentage": 65.83, "elapsed_time": "1:15:56", "remaining_time": "0:39:25", "throughput": 3595.02, "total_tokens": 16379784}
|
| 5038 |
+
{"current_steps": 25125, "total_steps": 38160, "loss": 0.4817, "lr": 1.576559590191696e-05, "epoch": 13.168238993710691, "percentage": 65.84, "elapsed_time": "1:15:57", "remaining_time": "0:39:24", "throughput": 3595.04, "total_tokens": 16382888}
|
| 5039 |
+
{"current_steps": 25130, "total_steps": 38160, "loss": 0.4129, "lr": 1.5754971216275692e-05, "epoch": 13.170859538784066, "percentage": 65.85, "elapsed_time": "1:15:57", "remaining_time": "0:39:23", "throughput": 3595.12, "total_tokens": 16386472}
|
| 5040 |
+
{"current_steps": 25135, "total_steps": 38160, "loss": 0.6133, "lr": 1.5744348464588975e-05, "epoch": 13.173480083857442, "percentage": 65.87, "elapsed_time": "1:15:58", "remaining_time": "0:39:22", "throughput": 3595.17, "total_tokens": 16389608}
|
| 5041 |
+
{"current_steps": 25140, "total_steps": 38160, "loss": 0.4026, "lr": 1.5733727649078968e-05, "epoch": 13.176100628930818, "percentage": 65.88, "elapsed_time": "1:15:59", "remaining_time": "0:39:21", "throughput": 3595.26, "total_tokens": 16393032}
|
| 5042 |
+
{"current_steps": 25145, "total_steps": 38160, "loss": 0.3953, "lr": 1.5723108771967414e-05, "epoch": 13.178721174004194, "percentage": 65.89, "elapsed_time": "1:16:00", "remaining_time": "0:39:20", "throughput": 3595.35, "total_tokens": 16396712}
|
| 5043 |
+
{"current_steps": 25150, "total_steps": 38160, "loss": 0.5422, "lr": 1.5712491835475685e-05, "epoch": 13.181341719077569, "percentage": 65.91, "elapsed_time": "1:16:01", "remaining_time": "0:39:19", "throughput": 3595.28, "total_tokens": 16399272}
|
| 5044 |
+
{"current_steps": 25155, "total_steps": 38160, "loss": 0.4118, "lr": 1.5701876841824698e-05, "epoch": 13.183962264150944, "percentage": 65.92, "elapsed_time": "1:16:02", "remaining_time": "0:39:18", "throughput": 3595.27, "total_tokens": 16402088}
|
| 5045 |
+
{"current_steps": 25160, "total_steps": 38160, "loss": 0.6537, "lr": 1.5691263793234993e-05, "epoch": 13.18658280922432, "percentage": 65.93, "elapsed_time": "1:16:03", "remaining_time": "0:39:17", "throughput": 3595.43, "total_tokens": 16406600}
|
| 5046 |
+
{"current_steps": 25165, "total_steps": 38160, "loss": 0.426, "lr": 1.5680652691926706e-05, "epoch": 13.189203354297694, "percentage": 65.95, "elapsed_time": "1:16:03", "remaining_time": "0:39:16", "throughput": 3595.38, "total_tokens": 16409224}
|
| 5047 |
+
{"current_steps": 25170, "total_steps": 38160, "loss": 0.783, "lr": 1.5670043540119553e-05, "epoch": 13.19182389937107, "percentage": 65.96, "elapsed_time": "1:16:04", "remaining_time": "0:39:15", "throughput": 3595.37, "total_tokens": 16412136}
|
| 5048 |
+
{"current_steps": 25175, "total_steps": 38160, "loss": 0.4659, "lr": 1.5659436340032855e-05, "epoch": 13.194444444444445, "percentage": 65.97, "elapsed_time": "1:16:05", "remaining_time": "0:39:14", "throughput": 3595.54, "total_tokens": 16416392}
|
| 5049 |
+
{"current_steps": 25180, "total_steps": 38160, "loss": 0.5295, "lr": 1.5648831093885515e-05, "epoch": 13.19706498951782, "percentage": 65.99, "elapsed_time": "1:16:06", "remaining_time": "0:39:14", "throughput": 3595.57, "total_tokens": 16419688}
|
| 5050 |
+
{"current_steps": 25185, "total_steps": 38160, "loss": 0.5109, "lr": 1.5638227803896015e-05, "epoch": 13.199685534591195, "percentage": 66.0, "elapsed_time": "1:16:07", "remaining_time": "0:39:13", "throughput": 3595.63, "total_tokens": 16422856}
|
| 5051 |
+
{"current_steps": 25190, "total_steps": 38160, "loss": 0.5308, "lr": 1.562762647228246e-05, "epoch": 13.20230607966457, "percentage": 66.01, "elapsed_time": "1:16:08", "remaining_time": "0:39:12", "throughput": 3595.6, "total_tokens": 16425608}
|
| 5052 |
+
{"current_steps": 25195, "total_steps": 38160, "loss": 0.4565, "lr": 1.5617027101262515e-05, "epoch": 13.204926624737945, "percentage": 66.02, "elapsed_time": "1:16:09", "remaining_time": "0:39:11", "throughput": 3595.72, "total_tokens": 16429288}
|
| 5053 |
+
{"current_steps": 25200, "total_steps": 38160, "loss": 0.5388, "lr": 1.5606429693053463e-05, "epoch": 13.20754716981132, "percentage": 66.04, "elapsed_time": "1:16:09", "remaining_time": "0:39:10", "throughput": 3595.79, "total_tokens": 16432744}
|
| 5054 |
+
{"current_steps": 25205, "total_steps": 38160, "loss": 0.3611, "lr": 1.5595834249872145e-05, "epoch": 13.210167714884696, "percentage": 66.05, "elapsed_time": "1:16:10", "remaining_time": "0:39:09", "throughput": 3595.81, "total_tokens": 16435944}
|
| 5055 |
+
{"current_steps": 25210, "total_steps": 38160, "loss": 0.4005, "lr": 1.558524077393502e-05, "epoch": 13.21278825995807, "percentage": 66.06, "elapsed_time": "1:16:11", "remaining_time": "0:39:08", "throughput": 3595.73, "total_tokens": 16438760}
|
| 5056 |
+
{"current_steps": 25215, "total_steps": 38160, "loss": 0.4146, "lr": 1.557464926745811e-05, "epoch": 13.215408805031446, "percentage": 66.08, "elapsed_time": "1:16:12", "remaining_time": "0:39:07", "throughput": 3595.8, "total_tokens": 16442280}
|
| 5057 |
+
{"current_steps": 25220, "total_steps": 38160, "loss": 0.4046, "lr": 1.5564059732657045e-05, "epoch": 13.218029350104821, "percentage": 66.09, "elapsed_time": "1:16:13", "remaining_time": "0:39:06", "throughput": 3595.76, "total_tokens": 16445384}
|
| 5058 |
+
{"current_steps": 25225, "total_steps": 38160, "loss": 0.3182, "lr": 1.555347217174703e-05, "epoch": 13.220649895178196, "percentage": 66.1, "elapsed_time": "1:16:14", "remaining_time": "0:39:05", "throughput": 3595.68, "total_tokens": 16448040}
|
| 5059 |
+
{"current_steps": 25230, "total_steps": 38160, "loss": 0.4337, "lr": 1.554288658694287e-05, "epoch": 13.223270440251572, "percentage": 66.12, "elapsed_time": "1:16:15", "remaining_time": "0:39:04", "throughput": 3595.72, "total_tokens": 16451432}
|
| 5060 |
+
{"current_steps": 25235, "total_steps": 38160, "loss": 0.3906, "lr": 1.553230298045895e-05, "epoch": 13.225890985324948, "percentage": 66.13, "elapsed_time": "1:16:16", "remaining_time": "0:39:03", "throughput": 3595.94, "total_tokens": 16455624}
|
| 5061 |
+
{"current_steps": 25240, "total_steps": 38160, "loss": 0.3693, "lr": 1.5521721354509223e-05, "epoch": 13.228511530398324, "percentage": 66.14, "elapsed_time": "1:16:17", "remaining_time": "0:39:02", "throughput": 3595.96, "total_tokens": 16459112}
|
| 5062 |
+
{"current_steps": 25245, "total_steps": 38160, "loss": 0.4626, "lr": 1.5511141711307258e-05, "epoch": 13.231132075471699, "percentage": 66.16, "elapsed_time": "1:16:17", "remaining_time": "0:39:02", "throughput": 3595.98, "total_tokens": 16462312}
|
| 5063 |
+
{"current_steps": 25250, "total_steps": 38160, "loss": 0.6713, "lr": 1.5500564053066184e-05, "epoch": 13.233752620545074, "percentage": 66.17, "elapsed_time": "1:16:18", "remaining_time": "0:39:01", "throughput": 3595.91, "total_tokens": 16465128}
|
| 5064 |
+
{"current_steps": 25255, "total_steps": 38160, "loss": 0.4263, "lr": 1.5489988381998743e-05, "epoch": 13.23637316561845, "percentage": 66.18, "elapsed_time": "1:16:19", "remaining_time": "0:39:00", "throughput": 3596.11, "total_tokens": 16469320}
|
| 5065 |
+
{"current_steps": 25260, "total_steps": 38160, "loss": 0.6094, "lr": 1.5479414700317237e-05, "epoch": 13.238993710691824, "percentage": 66.19, "elapsed_time": "1:16:20", "remaining_time": "0:38:59", "throughput": 3596.09, "total_tokens": 16472296}
|
| 5066 |
+
{"current_steps": 25265, "total_steps": 38160, "loss": 0.5335, "lr": 1.546884301023356e-05, "epoch": 13.2416142557652, "percentage": 66.21, "elapsed_time": "1:16:21", "remaining_time": "0:38:58", "throughput": 3596.04, "total_tokens": 16475240}
|
| 5067 |
+
{"current_steps": 25270, "total_steps": 38160, "loss": 0.5186, "lr": 1.5458273313959186e-05, "epoch": 13.244234800838575, "percentage": 66.22, "elapsed_time": "1:16:22", "remaining_time": "0:38:57", "throughput": 3595.96, "total_tokens": 16477736}
|
| 5068 |
+
{"current_steps": 25275, "total_steps": 38160, "loss": 0.5394, "lr": 1.5447705613705177e-05, "epoch": 13.24685534591195, "percentage": 66.23, "elapsed_time": "1:16:23", "remaining_time": "0:38:56", "throughput": 3596.0, "total_tokens": 16480776}
|
| 5069 |
+
{"current_steps": 25280, "total_steps": 38160, "loss": 0.4599, "lr": 1.5437139911682176e-05, "epoch": 13.249475890985325, "percentage": 66.25, "elapsed_time": "1:16:24", "remaining_time": "0:38:55", "throughput": 3596.08, "total_tokens": 16484584}
|
| 5070 |
+
{"current_steps": 25285, "total_steps": 38160, "loss": 0.4767, "lr": 1.54265762101004e-05, "epoch": 13.2520964360587, "percentage": 66.26, "elapsed_time": "1:16:24", "remaining_time": "0:38:54", "throughput": 3596.05, "total_tokens": 16487400}
|
| 5071 |
+
{"current_steps": 25290, "total_steps": 38160, "loss": 0.4506, "lr": 1.5416014511169663e-05, "epoch": 13.254716981132075, "percentage": 66.27, "elapsed_time": "1:16:25", "remaining_time": "0:38:53", "throughput": 3596.0, "total_tokens": 16490024}
|
| 5072 |
+
{"current_steps": 25295, "total_steps": 38160, "loss": 0.4746, "lr": 1.5405454817099363e-05, "epoch": 13.25733752620545, "percentage": 66.29, "elapsed_time": "1:16:26", "remaining_time": "0:38:52", "throughput": 3596.0, "total_tokens": 16493064}
|
| 5073 |
+
{"current_steps": 25300, "total_steps": 38160, "loss": 0.4043, "lr": 1.5394897130098446e-05, "epoch": 13.259958071278826, "percentage": 66.3, "elapsed_time": "1:16:27", "remaining_time": "0:38:51", "throughput": 3595.97, "total_tokens": 16496136}
|
| 5074 |
+
{"current_steps": 25305, "total_steps": 38160, "loss": 0.4148, "lr": 1.5384341452375465e-05, "epoch": 13.2625786163522, "percentage": 66.31, "elapsed_time": "1:16:28", "remaining_time": "0:38:50", "throughput": 3596.14, "total_tokens": 16500328}
|
| 5075 |
+
{"current_steps": 25310, "total_steps": 38160, "loss": 0.5072, "lr": 1.5373787786138556e-05, "epoch": 13.265199161425576, "percentage": 66.33, "elapsed_time": "1:16:29", "remaining_time": "0:38:49", "throughput": 3596.2, "total_tokens": 16503688}
|
| 5076 |
+
{"current_steps": 25315, "total_steps": 38160, "loss": 0.5149, "lr": 1.5363236133595416e-05, "epoch": 13.267819706498951, "percentage": 66.34, "elapsed_time": "1:16:30", "remaining_time": "0:38:49", "throughput": 3596.24, "total_tokens": 16507112}
|
| 5077 |
+
{"current_steps": 25320, "total_steps": 38160, "loss": 0.5093, "lr": 1.5352686496953344e-05, "epoch": 13.270440251572326, "percentage": 66.35, "elapsed_time": "1:16:31", "remaining_time": "0:38:48", "throughput": 3596.28, "total_tokens": 16510920}
|
| 5078 |
+
{"current_steps": 25325, "total_steps": 38160, "loss": 0.5183, "lr": 1.534213887841919e-05, "epoch": 13.273060796645701, "percentage": 66.37, "elapsed_time": "1:16:31", "remaining_time": "0:38:47", "throughput": 3596.25, "total_tokens": 16513736}
|
| 5079 |
+
{"current_steps": 25330, "total_steps": 38160, "loss": 0.5203, "lr": 1.5331593280199395e-05, "epoch": 13.275681341719078, "percentage": 66.38, "elapsed_time": "1:16:32", "remaining_time": "0:38:46", "throughput": 3596.19, "total_tokens": 16516744}
|
| 5080 |
+
{"current_steps": 25335, "total_steps": 38160, "loss": 0.5434, "lr": 1.532104970449999e-05, "epoch": 13.278301886792454, "percentage": 66.39, "elapsed_time": "1:16:33", "remaining_time": "0:38:45", "throughput": 3596.2, "total_tokens": 16519976}
|
| 5081 |
+
{"current_steps": 25340, "total_steps": 38160, "loss": 0.4928, "lr": 1.5310508153526555e-05, "epoch": 13.280922431865829, "percentage": 66.4, "elapsed_time": "1:16:34", "remaining_time": "0:38:44", "throughput": 3596.23, "total_tokens": 16523112}
|
| 5082 |
+
{"current_steps": 25345, "total_steps": 38160, "loss": 0.5937, "lr": 1.5299968629484276e-05, "epoch": 13.283542976939204, "percentage": 66.42, "elapsed_time": "1:16:35", "remaining_time": "0:38:43", "throughput": 3596.21, "total_tokens": 16526376}
|
| 5083 |
+
{"current_steps": 25350, "total_steps": 38160, "loss": 0.4714, "lr": 1.5289431134577904e-05, "epoch": 13.286163522012579, "percentage": 66.43, "elapsed_time": "1:16:37", "remaining_time": "0:38:43", "throughput": 3596.42, "total_tokens": 16533160}
|
| 5084 |
+
{"current_steps": 25355, "total_steps": 38160, "loss": 0.413, "lr": 1.527889567101174e-05, "epoch": 13.288784067085954, "percentage": 66.44, "elapsed_time": "1:16:38", "remaining_time": "0:38:42", "throughput": 3596.47, "total_tokens": 16536680}
|
| 5085 |
+
{"current_steps": 25360, "total_steps": 38160, "loss": 0.4372, "lr": 1.5268362240989707e-05, "epoch": 13.29140461215933, "percentage": 66.46, "elapsed_time": "1:16:38", "remaining_time": "0:38:41", "throughput": 3596.41, "total_tokens": 16539656}
|
| 5086 |
+
{"current_steps": 25365, "total_steps": 38160, "loss": 0.4309, "lr": 1.525783084671526e-05, "epoch": 13.294025157232705, "percentage": 66.47, "elapsed_time": "1:16:39", "remaining_time": "0:38:40", "throughput": 3596.34, "total_tokens": 16542376}
|
| 5087 |
+
{"current_steps": 25370, "total_steps": 38160, "loss": 0.4442, "lr": 1.5247301490391458e-05, "epoch": 13.29664570230608, "percentage": 66.48, "elapsed_time": "1:16:40", "remaining_time": "0:38:39", "throughput": 3596.34, "total_tokens": 16545352}
|
| 5088 |
+
{"current_steps": 25375, "total_steps": 38160, "loss": 0.5632, "lr": 1.5236774174220922e-05, "epoch": 13.299266247379455, "percentage": 66.5, "elapsed_time": "1:16:41", "remaining_time": "0:38:38", "throughput": 3596.36, "total_tokens": 16548424}
|
| 5089 |
+
{"current_steps": 25380, "total_steps": 38160, "loss": 0.5002, "lr": 1.5226248900405852e-05, "epoch": 13.30188679245283, "percentage": 66.51, "elapsed_time": "1:16:42", "remaining_time": "0:38:37", "throughput": 3596.33, "total_tokens": 16551592}
|
| 5090 |
+
{"current_steps": 25385, "total_steps": 38160, "loss": 0.3726, "lr": 1.5215725671147996e-05, "epoch": 13.304507337526205, "percentage": 66.52, "elapsed_time": "1:16:43", "remaining_time": "0:38:36", "throughput": 3596.33, "total_tokens": 16554568}
|
| 5091 |
+
{"current_steps": 25390, "total_steps": 38160, "loss": 0.4632, "lr": 1.5205204488648709e-05, "epoch": 13.30712788259958, "percentage": 66.54, "elapsed_time": "1:16:44", "remaining_time": "0:38:35", "throughput": 3596.24, "total_tokens": 16557416}
|
| 5092 |
+
{"current_steps": 25395, "total_steps": 38160, "loss": 0.495, "lr": 1.5194685355108893e-05, "epoch": 13.309748427672956, "percentage": 66.55, "elapsed_time": "1:16:44", "remaining_time": "0:38:34", "throughput": 3596.21, "total_tokens": 16560424}
|
| 5093 |
+
{"current_steps": 25400, "total_steps": 38160, "loss": 0.4192, "lr": 1.518416827272904e-05, "epoch": 13.31236897274633, "percentage": 66.56, "elapsed_time": "1:16:45", "remaining_time": "0:38:33", "throughput": 3596.26, "total_tokens": 16563880}
|
| 5094 |
+
{"current_steps": 25405, "total_steps": 38160, "loss": 0.3931, "lr": 1.5173653243709207e-05, "epoch": 13.314989517819706, "percentage": 66.57, "elapsed_time": "1:16:46", "remaining_time": "0:38:32", "throughput": 3596.32, "total_tokens": 16567432}
|
| 5095 |
+
{"current_steps": 25410, "total_steps": 38160, "loss": 0.565, "lr": 1.5163140270249e-05, "epoch": 13.317610062893081, "percentage": 66.59, "elapsed_time": "1:16:47", "remaining_time": "0:38:32", "throughput": 3596.36, "total_tokens": 16570888}
|
| 5096 |
+
{"current_steps": 25415, "total_steps": 38160, "loss": 0.4958, "lr": 1.5152629354547626e-05, "epoch": 13.320230607966456, "percentage": 66.6, "elapsed_time": "1:16:48", "remaining_time": "0:38:31", "throughput": 3596.23, "total_tokens": 16573384}
|
| 5097 |
+
{"current_steps": 25420, "total_steps": 38160, "loss": 0.5051, "lr": 1.5142120498803842e-05, "epoch": 13.322851153039831, "percentage": 66.61, "elapsed_time": "1:16:49", "remaining_time": "0:38:30", "throughput": 3596.26, "total_tokens": 16576584}
|
| 5098 |
+
{"current_steps": 25425, "total_steps": 38160, "loss": 0.5246, "lr": 1.5131613705215991e-05, "epoch": 13.325471698113208, "percentage": 66.63, "elapsed_time": "1:16:50", "remaining_time": "0:38:29", "throughput": 3596.36, "total_tokens": 16580584}
|
| 5099 |
+
{"current_steps": 25430, "total_steps": 38160, "loss": 0.5637, "lr": 1.5121108975981965e-05, "epoch": 13.328092243186584, "percentage": 66.64, "elapsed_time": "1:16:51", "remaining_time": "0:38:28", "throughput": 3596.41, "total_tokens": 16583848}
|
| 5100 |
+
{"current_steps": 25435, "total_steps": 38160, "loss": 0.3626, "lr": 1.5110606313299248e-05, "epoch": 13.330712788259959, "percentage": 66.65, "elapsed_time": "1:16:52", "remaining_time": "0:38:27", "throughput": 3596.43, "total_tokens": 16587272}
|
| 5101 |
+
{"current_steps": 25440, "total_steps": 38160, "loss": 0.4424, "lr": 1.5100105719364855e-05, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "1:16:52", "remaining_time": "0:38:26", "throughput": 3596.42, "total_tokens": 16590088}
|
| 5102 |
+
{"current_steps": 25445, "total_steps": 38160, "loss": 0.4705, "lr": 1.50896071963754e-05, "epoch": 13.335953878406709, "percentage": 66.68, "elapsed_time": "1:16:53", "remaining_time": "0:38:25", "throughput": 3596.51, "total_tokens": 16593320}
|
| 5103 |
+
{"current_steps": 25450, "total_steps": 38160, "loss": 0.4888, "lr": 1.5079110746527055e-05, "epoch": 13.338574423480084, "percentage": 66.69, "elapsed_time": "1:16:54", "remaining_time": "0:38:24", "throughput": 3596.5, "total_tokens": 16596328}
|
| 5104 |
+
{"current_steps": 25455, "total_steps": 38160, "loss": 0.6497, "lr": 1.5068616372015565e-05, "epoch": 13.34119496855346, "percentage": 66.71, "elapsed_time": "1:16:55", "remaining_time": "0:38:23", "throughput": 3596.46, "total_tokens": 16599080}
|
| 5105 |
+
{"current_steps": 25460, "total_steps": 38160, "loss": 0.4406, "lr": 1.5058124075036223e-05, "epoch": 13.343815513626835, "percentage": 66.72, "elapsed_time": "1:16:56", "remaining_time": "0:38:22", "throughput": 3596.49, "total_tokens": 16602056}
|
| 5106 |
+
{"current_steps": 25465, "total_steps": 38160, "loss": 0.5165, "lr": 1.5047633857783908e-05, "epoch": 13.34643605870021, "percentage": 66.73, "elapsed_time": "1:16:57", "remaining_time": "0:38:21", "throughput": 3596.64, "total_tokens": 16606216}
|
| 5107 |
+
{"current_steps": 25470, "total_steps": 38160, "loss": 0.4232, "lr": 1.5037145722453044e-05, "epoch": 13.349056603773585, "percentage": 66.75, "elapsed_time": "1:16:58", "remaining_time": "0:38:20", "throughput": 3596.57, "total_tokens": 16609064}
|
| 5108 |
+
{"current_steps": 25475, "total_steps": 38160, "loss": 0.5911, "lr": 1.5026659671237624e-05, "epoch": 13.35167714884696, "percentage": 66.76, "elapsed_time": "1:16:58", "remaining_time": "0:38:19", "throughput": 3596.43, "total_tokens": 16611560}
|
| 5109 |
+
{"current_steps": 25480, "total_steps": 38160, "loss": 0.4133, "lr": 1.501617570633123e-05, "epoch": 13.354297693920335, "percentage": 66.77, "elapsed_time": "1:16:59", "remaining_time": "0:38:18", "throughput": 3596.41, "total_tokens": 16614408}
|
| 5110 |
+
{"current_steps": 25485, "total_steps": 38160, "loss": 0.4757, "lr": 1.500569382992697e-05, "epoch": 13.35691823899371, "percentage": 66.78, "elapsed_time": "1:17:00", "remaining_time": "0:38:18", "throughput": 3596.37, "total_tokens": 16617256}
|
| 5111 |
+
{"current_steps": 25490, "total_steps": 38160, "loss": 0.4935, "lr": 1.4995214044217547e-05, "epoch": 13.359538784067086, "percentage": 66.8, "elapsed_time": "1:17:01", "remaining_time": "0:38:17", "throughput": 3596.36, "total_tokens": 16620232}
|
| 5112 |
+
{"current_steps": 25495, "total_steps": 38160, "loss": 0.5013, "lr": 1.49847363513952e-05, "epoch": 13.36215932914046, "percentage": 66.81, "elapsed_time": "1:17:02", "remaining_time": "0:38:16", "throughput": 3596.35, "total_tokens": 16623912}
|
| 5113 |
+
{"current_steps": 25500, "total_steps": 38160, "loss": 0.6071, "lr": 1.4974260753651748e-05, "epoch": 13.364779874213836, "percentage": 66.82, "elapsed_time": "1:17:03", "remaining_time": "0:38:15", "throughput": 3596.22, "total_tokens": 16626440}
|
| 5114 |
+
{"current_steps": 25505, "total_steps": 38160, "loss": 0.6389, "lr": 1.4963787253178568e-05, "epoch": 13.367400419287211, "percentage": 66.84, "elapsed_time": "1:17:04", "remaining_time": "0:38:14", "throughput": 3596.34, "total_tokens": 16630440}
|
| 5115 |
+
{"current_steps": 25510, "total_steps": 38160, "loss": 0.4768, "lr": 1.4953315852166594e-05, "epoch": 13.370020964360586, "percentage": 66.85, "elapsed_time": "1:17:05", "remaining_time": "0:38:13", "throughput": 3596.31, "total_tokens": 16633352}
|
| 5116 |
+
{"current_steps": 25515, "total_steps": 38160, "loss": 0.5182, "lr": 1.4942846552806322e-05, "epoch": 13.372641509433961, "percentage": 66.86, "elapsed_time": "1:17:05", "remaining_time": "0:38:12", "throughput": 3596.31, "total_tokens": 16636392}
|
| 5117 |
+
{"current_steps": 25520, "total_steps": 38160, "loss": 0.4672, "lr": 1.4932379357287829e-05, "epoch": 13.375262054507338, "percentage": 66.88, "elapsed_time": "1:17:06", "remaining_time": "0:38:11", "throughput": 3596.41, "total_tokens": 16639688}
|
| 5118 |
+
{"current_steps": 25525, "total_steps": 38160, "loss": 0.4371, "lr": 1.49219142678007e-05, "epoch": 13.377882599580714, "percentage": 66.89, "elapsed_time": "1:17:07", "remaining_time": "0:38:10", "throughput": 3596.53, "total_tokens": 16643176}
|
| 5119 |
+
{"current_steps": 25530, "total_steps": 38160, "loss": 0.4863, "lr": 1.4911451286534128e-05, "epoch": 13.380503144654089, "percentage": 66.9, "elapsed_time": "1:17:08", "remaining_time": "0:38:09", "throughput": 3596.6, "total_tokens": 16646952}
|
| 5120 |
+
{"current_steps": 25535, "total_steps": 38160, "loss": 0.4659, "lr": 1.490099041567686e-05, "epoch": 13.383123689727464, "percentage": 66.92, "elapsed_time": "1:17:09", "remaining_time": "0:38:08", "throughput": 3596.72, "total_tokens": 16650792}
|
| 5121 |
+
{"current_steps": 25540, "total_steps": 38160, "loss": 0.4253, "lr": 1.4890531657417176e-05, "epoch": 13.385744234800839, "percentage": 66.93, "elapsed_time": "1:17:10", "remaining_time": "0:38:07", "throughput": 3596.84, "total_tokens": 16654568}
|
| 5122 |
+
{"current_steps": 25545, "total_steps": 38160, "loss": 0.5565, "lr": 1.4880075013942938e-05, "epoch": 13.388364779874214, "percentage": 66.94, "elapsed_time": "1:17:11", "remaining_time": "0:38:07", "throughput": 3596.86, "total_tokens": 16657608}
|
| 5123 |
+
{"current_steps": 25550, "total_steps": 38160, "loss": 0.5105, "lr": 1.4869620487441558e-05, "epoch": 13.39098532494759, "percentage": 66.95, "elapsed_time": "1:17:11", "remaining_time": "0:38:06", "throughput": 3596.82, "total_tokens": 16660360}
|
| 5124 |
+
{"current_steps": 25555, "total_steps": 38160, "loss": 0.3947, "lr": 1.4859168080099988e-05, "epoch": 13.393605870020965, "percentage": 66.97, "elapsed_time": "1:17:12", "remaining_time": "0:38:05", "throughput": 3596.79, "total_tokens": 16663048}
|
| 5125 |
+
{"current_steps": 25560, "total_steps": 38160, "loss": 0.4179, "lr": 1.484871779410477e-05, "epoch": 13.39622641509434, "percentage": 66.98, "elapsed_time": "1:17:13", "remaining_time": "0:38:04", "throughput": 3596.97, "total_tokens": 16667080}
|
| 5126 |
+
{"current_steps": 25565, "total_steps": 38160, "loss": 0.4835, "lr": 1.4838269631641971e-05, "epoch": 13.398846960167715, "percentage": 66.99, "elapsed_time": "1:17:14", "remaining_time": "0:38:03", "throughput": 3596.91, "total_tokens": 16669608}
|
| 5127 |
+
{"current_steps": 25570, "total_steps": 38160, "loss": 0.4359, "lr": 1.482782359489724e-05, "epoch": 13.40146750524109, "percentage": 67.01, "elapsed_time": "1:17:16", "remaining_time": "0:38:02", "throughput": 3597.27, "total_tokens": 16676968}
|
| 5128 |
+
{"current_steps": 25575, "total_steps": 38160, "loss": 0.4922, "lr": 1.4817379686055754e-05, "epoch": 13.404088050314465, "percentage": 67.02, "elapsed_time": "1:17:16", "remaining_time": "0:38:01", "throughput": 3597.37, "total_tokens": 16680552}
|
| 5129 |
+
{"current_steps": 25580, "total_steps": 38160, "loss": 0.4341, "lr": 1.4806937907302282e-05, "epoch": 13.40670859538784, "percentage": 67.03, "elapsed_time": "1:17:17", "remaining_time": "0:38:00", "throughput": 3597.47, "total_tokens": 16683880}
|
| 5130 |
+
{"current_steps": 25585, "total_steps": 38160, "loss": 0.5765, "lr": 1.4796498260821103e-05, "epoch": 13.409329140461216, "percentage": 67.05, "elapsed_time": "1:17:18", "remaining_time": "0:37:59", "throughput": 3597.37, "total_tokens": 16686312}
|
| 5131 |
+
{"current_steps": 25590, "total_steps": 38160, "loss": 0.4962, "lr": 1.4786060748796076e-05, "epoch": 13.41194968553459, "percentage": 67.06, "elapsed_time": "1:17:19", "remaining_time": "0:37:58", "throughput": 3597.28, "total_tokens": 16688776}
|
| 5132 |
+
{"current_steps": 25595, "total_steps": 38160, "loss": 0.3472, "lr": 1.4775625373410617e-05, "epoch": 13.414570230607966, "percentage": 67.07, "elapsed_time": "1:17:20", "remaining_time": "0:37:57", "throughput": 3597.33, "total_tokens": 16692104}
|
| 5133 |
+
{"current_steps": 25600, "total_steps": 38160, "loss": 0.4745, "lr": 1.4765192136847685e-05, "epoch": 13.417190775681341, "percentage": 67.09, "elapsed_time": "1:17:20", "remaining_time": "0:37:56", "throughput": 3597.21, "total_tokens": 16694600}
|
| 5134 |
+
{"current_steps": 25605, "total_steps": 38160, "loss": 0.4503, "lr": 1.47547610412898e-05, "epoch": 13.419811320754716, "percentage": 67.1, "elapsed_time": "1:17:21", "remaining_time": "0:37:56", "throughput": 3597.09, "total_tokens": 16697384}
|
| 5135 |
+
{"current_steps": 25610, "total_steps": 38160, "loss": 0.4877, "lr": 1.4744332088919011e-05, "epoch": 13.422431865828091, "percentage": 67.11, "elapsed_time": "1:17:22", "remaining_time": "0:37:55", "throughput": 3597.2, "total_tokens": 16700776}
|
| 5136 |
+
{"current_steps": 25615, "total_steps": 38160, "loss": 0.4622, "lr": 1.4733905281916954e-05, "epoch": 13.425052410901468, "percentage": 67.13, "elapsed_time": "1:17:23", "remaining_time": "0:37:54", "throughput": 3597.13, "total_tokens": 16703272}
|
| 5137 |
+
{"current_steps": 25620, "total_steps": 38160, "loss": 0.371, "lr": 1.472348062246478e-05, "epoch": 13.427672955974844, "percentage": 67.14, "elapsed_time": "1:17:24", "remaining_time": "0:37:53", "throughput": 3596.85, "total_tokens": 16706440}
|
| 5138 |
+
{"current_steps": 25625, "total_steps": 38160, "loss": 0.4908, "lr": 1.4713058112743228e-05, "epoch": 13.430293501048219, "percentage": 67.15, "elapsed_time": "1:17:25", "remaining_time": "0:37:52", "throughput": 3596.85, "total_tokens": 16709256}
|
| 5139 |
+
{"current_steps": 25630, "total_steps": 38160, "loss": 0.4289, "lr": 1.4702637754932553e-05, "epoch": 13.432914046121594, "percentage": 67.16, "elapsed_time": "1:17:26", "remaining_time": "0:37:51", "throughput": 3596.96, "total_tokens": 16712712}
|
| 5140 |
+
{"current_steps": 25635, "total_steps": 38160, "loss": 0.4529, "lr": 1.4692219551212587e-05, "epoch": 13.435534591194969, "percentage": 67.18, "elapsed_time": "1:17:27", "remaining_time": "0:37:50", "throughput": 3596.94, "total_tokens": 16715464}
|
| 5141 |
+
{"current_steps": 25640, "total_steps": 38160, "loss": 0.6656, "lr": 1.4681803503762695e-05, "epoch": 13.438155136268344, "percentage": 67.19, "elapsed_time": "1:17:28", "remaining_time": "0:37:49", "throughput": 3597.08, "total_tokens": 16719304}
|
| 5142 |
+
{"current_steps": 25645, "total_steps": 38160, "loss": 0.4883, "lr": 1.4671389614761788e-05, "epoch": 13.44077568134172, "percentage": 67.2, "elapsed_time": "1:17:28", "remaining_time": "0:37:48", "throughput": 3597.12, "total_tokens": 16722760}
|
| 5143 |
+
{"current_steps": 25650, "total_steps": 38160, "loss": 0.5622, "lr": 1.4660977886388339e-05, "epoch": 13.443396226415095, "percentage": 67.22, "elapsed_time": "1:17:29", "remaining_time": "0:37:47", "throughput": 3597.17, "total_tokens": 16726152}
|
| 5144 |
+
{"current_steps": 25655, "total_steps": 38160, "loss": 0.4832, "lr": 1.4650568320820362e-05, "epoch": 13.44601677148847, "percentage": 67.23, "elapsed_time": "1:17:30", "remaining_time": "0:37:46", "throughput": 3597.11, "total_tokens": 16728808}
|
| 5145 |
+
{"current_steps": 25660, "total_steps": 38160, "loss": 0.4588, "lr": 1.4640160920235423e-05, "epoch": 13.448637316561845, "percentage": 67.24, "elapsed_time": "1:17:31", "remaining_time": "0:37:45", "throughput": 3596.99, "total_tokens": 16731240}
|
| 5146 |
+
{"current_steps": 25665, "total_steps": 38160, "loss": 0.5985, "lr": 1.4629755686810631e-05, "epoch": 13.45125786163522, "percentage": 67.26, "elapsed_time": "1:17:32", "remaining_time": "0:37:44", "throughput": 3597.16, "total_tokens": 16735112}
|
| 5147 |
+
{"current_steps": 25670, "total_steps": 38160, "loss": 0.4903, "lr": 1.4619352622722632e-05, "epoch": 13.453878406708595, "percentage": 67.27, "elapsed_time": "1:17:33", "remaining_time": "0:37:44", "throughput": 3597.23, "total_tokens": 16738568}
|
| 5148 |
+
{"current_steps": 25675, "total_steps": 38160, "loss": 0.4562, "lr": 1.4608951730147635e-05, "epoch": 13.45649895178197, "percentage": 67.28, "elapsed_time": "1:17:34", "remaining_time": "0:37:43", "throughput": 3597.29, "total_tokens": 16742056}
|
| 5149 |
+
{"current_steps": 25680, "total_steps": 38160, "loss": 0.4653, "lr": 1.4598553011261395e-05, "epoch": 13.459119496855346, "percentage": 67.3, "elapsed_time": "1:17:34", "remaining_time": "0:37:42", "throughput": 3597.16, "total_tokens": 16744616}
|
| 5150 |
+
{"current_steps": 25685, "total_steps": 38160, "loss": 0.5476, "lr": 1.4588156468239192e-05, "epoch": 13.46174004192872, "percentage": 67.31, "elapsed_time": "1:17:35", "remaining_time": "0:37:41", "throughput": 3597.09, "total_tokens": 16747464}
|
| 5151 |
+
{"current_steps": 25690, "total_steps": 38160, "loss": 0.4568, "lr": 1.4577762103255882e-05, "epoch": 13.464360587002096, "percentage": 67.32, "elapsed_time": "1:17:36", "remaining_time": "0:37:40", "throughput": 3597.21, "total_tokens": 16752104}
|
| 5152 |
+
{"current_steps": 25695, "total_steps": 38160, "loss": 0.3733, "lr": 1.4567369918485828e-05, "epoch": 13.466981132075471, "percentage": 67.33, "elapsed_time": "1:17:37", "remaining_time": "0:37:39", "throughput": 3597.29, "total_tokens": 16755720}
|
| 5153 |
+
{"current_steps": 25700, "total_steps": 38160, "loss": 0.5691, "lr": 1.4556979916102976e-05, "epoch": 13.469601677148846, "percentage": 67.35, "elapsed_time": "1:17:38", "remaining_time": "0:37:38", "throughput": 3597.25, "total_tokens": 16758600}
|
| 5154 |
+
{"current_steps": 25705, "total_steps": 38160, "loss": 0.4609, "lr": 1.454659209828077e-05, "epoch": 13.472222222222221, "percentage": 67.36, "elapsed_time": "1:17:39", "remaining_time": "0:37:37", "throughput": 3597.19, "total_tokens": 16761352}
|
| 5155 |
+
{"current_steps": 25710, "total_steps": 38160, "loss": 0.5866, "lr": 1.4536206467192243e-05, "epoch": 13.474842767295598, "percentage": 67.37, "elapsed_time": "1:17:40", "remaining_time": "0:37:36", "throughput": 3597.33, "total_tokens": 16765512}
|
| 5156 |
+
{"current_steps": 25715, "total_steps": 38160, "loss": 0.606, "lr": 1.4525823025009944e-05, "epoch": 13.477463312368974, "percentage": 67.39, "elapsed_time": "1:17:41", "remaining_time": "0:37:35", "throughput": 3597.39, "total_tokens": 16769064}
|
| 5157 |
+
{"current_steps": 25720, "total_steps": 38160, "loss": 0.5785, "lr": 1.4515441773905983e-05, "epoch": 13.480083857442349, "percentage": 67.4, "elapsed_time": "1:17:42", "remaining_time": "0:37:34", "throughput": 3597.46, "total_tokens": 16772232}
|
| 5158 |
+
{"current_steps": 25725, "total_steps": 38160, "loss": 0.4049, "lr": 1.4505062716051988e-05, "epoch": 13.482704402515724, "percentage": 67.41, "elapsed_time": "1:17:43", "remaining_time": "0:37:34", "throughput": 3597.66, "total_tokens": 16776616}
|
| 5159 |
+
{"current_steps": 25730, "total_steps": 38160, "loss": 0.4728, "lr": 1.4494685853619133e-05, "epoch": 13.485324947589099, "percentage": 67.43, "elapsed_time": "1:17:44", "remaining_time": "0:37:33", "throughput": 3597.65, "total_tokens": 16779496}
|
| 5160 |
+
{"current_steps": 25735, "total_steps": 38160, "loss": 0.3826, "lr": 1.4484311188778143e-05, "epoch": 13.487945492662474, "percentage": 67.44, "elapsed_time": "1:17:44", "remaining_time": "0:37:32", "throughput": 3597.75, "total_tokens": 16783368}
|
| 5161 |
+
{"current_steps": 25740, "total_steps": 38160, "loss": 0.5224, "lr": 1.4473938723699285e-05, "epoch": 13.49056603773585, "percentage": 67.45, "elapsed_time": "1:17:45", "remaining_time": "0:37:31", "throughput": 3597.64, "total_tokens": 16785928}
|
| 5162 |
+
{"current_steps": 25745, "total_steps": 38160, "loss": 0.4737, "lr": 1.4463568460552368e-05, "epoch": 13.493186582809225, "percentage": 67.47, "elapsed_time": "1:17:46", "remaining_time": "0:37:30", "throughput": 3597.7, "total_tokens": 16789320}
|
| 5163 |
+
{"current_steps": 25750, "total_steps": 38160, "loss": 0.5147, "lr": 1.4453200401506723e-05, "epoch": 13.4958071278826, "percentage": 67.48, "elapsed_time": "1:17:47", "remaining_time": "0:37:29", "throughput": 3597.92, "total_tokens": 16793928}
|
| 5164 |
+
{"current_steps": 25755, "total_steps": 38160, "loss": 0.5254, "lr": 1.4442834548731218e-05, "epoch": 13.498427672955975, "percentage": 67.49, "elapsed_time": "1:17:48", "remaining_time": "0:37:28", "throughput": 3597.91, "total_tokens": 16796872}
|
| 5165 |
+
{"current_steps": 25760, "total_steps": 38160, "loss": 0.4091, "lr": 1.4432470904394285e-05, "epoch": 13.50104821802935, "percentage": 67.51, "elapsed_time": "1:17:49", "remaining_time": "0:37:27", "throughput": 3598.05, "total_tokens": 16800552}
|
| 5166 |
+
{"current_steps": 25765, "total_steps": 38160, "loss": 0.4516, "lr": 1.4422109470663878e-05, "epoch": 13.503668763102725, "percentage": 67.52, "elapsed_time": "1:17:50", "remaining_time": "0:37:26", "throughput": 3598.14, "total_tokens": 16803880}
|
| 5167 |
+
{"current_steps": 25770, "total_steps": 38160, "loss": 0.473, "lr": 1.441175024970749e-05, "epoch": 13.5062893081761, "percentage": 67.53, "elapsed_time": "1:17:51", "remaining_time": "0:37:25", "throughput": 3598.03, "total_tokens": 16806600}
|
| 5168 |
+
{"current_steps": 25775, "total_steps": 38160, "loss": 0.4495, "lr": 1.4401393243692163e-05, "epoch": 13.508909853249476, "percentage": 67.54, "elapsed_time": "1:17:51", "remaining_time": "0:37:24", "throughput": 3598.04, "total_tokens": 16809576}
|
| 5169 |
+
{"current_steps": 25780, "total_steps": 38160, "loss": 0.5392, "lr": 1.4391038454784455e-05, "epoch": 13.51153039832285, "percentage": 67.56, "elapsed_time": "1:17:52", "remaining_time": "0:37:23", "throughput": 3598.11, "total_tokens": 16812776}
|
| 5170 |
+
{"current_steps": 25785, "total_steps": 38160, "loss": 0.4746, "lr": 1.4380685885150458e-05, "epoch": 13.514150943396226, "percentage": 67.57, "elapsed_time": "1:17:53", "remaining_time": "0:37:22", "throughput": 3598.04, "total_tokens": 16815496}
|
| 5171 |
+
{"current_steps": 25790, "total_steps": 38160, "loss": 0.4602, "lr": 1.437033553695582e-05, "epoch": 13.516771488469601, "percentage": 67.58, "elapsed_time": "1:17:54", "remaining_time": "0:37:22", "throughput": 3598.12, "total_tokens": 16818984}
|
| 5172 |
+
{"current_steps": 25795, "total_steps": 38160, "loss": 0.3386, "lr": 1.4359987412365721e-05, "epoch": 13.519392033542976, "percentage": 67.6, "elapsed_time": "1:17:55", "remaining_time": "0:37:21", "throughput": 3598.43, "total_tokens": 16825096}
|
| 5173 |
+
{"current_steps": 25800, "total_steps": 38160, "loss": 0.5552, "lr": 1.4349641513544876e-05, "epoch": 13.522012578616351, "percentage": 67.61, "elapsed_time": "1:17:56", "remaining_time": "0:37:20", "throughput": 3598.4, "total_tokens": 16827880}
|
| 5174 |
+
{"current_steps": 25805, "total_steps": 38160, "loss": 0.5496, "lr": 1.4339297842657523e-05, "epoch": 13.524633123689728, "percentage": 67.62, "elapsed_time": "1:17:57", "remaining_time": "0:37:19", "throughput": 3598.37, "total_tokens": 16830504}
|
| 5175 |
+
{"current_steps": 25810, "total_steps": 38160, "loss": 0.4408, "lr": 1.4328956401867427e-05, "epoch": 13.527253668763104, "percentage": 67.64, "elapsed_time": "1:17:58", "remaining_time": "0:37:18", "throughput": 3598.25, "total_tokens": 16833128}
|
| 5176 |
+
{"current_steps": 25815, "total_steps": 38160, "loss": 0.5072, "lr": 1.4318617193337908e-05, "epoch": 13.529874213836479, "percentage": 67.65, "elapsed_time": "1:17:58", "remaining_time": "0:37:17", "throughput": 3598.17, "total_tokens": 16835592}
|
| 5177 |
+
{"current_steps": 25820, "total_steps": 38160, "loss": 0.5631, "lr": 1.4308280219231812e-05, "epoch": 13.532494758909854, "percentage": 67.66, "elapsed_time": "1:17:59", "remaining_time": "0:37:16", "throughput": 3598.32, "total_tokens": 16839752}
|
| 5178 |
+
{"current_steps": 25825, "total_steps": 38160, "loss": 0.5642, "lr": 1.4297945481711527e-05, "epoch": 13.535115303983229, "percentage": 67.68, "elapsed_time": "1:18:00", "remaining_time": "0:37:15", "throughput": 3598.24, "total_tokens": 16842568}
|
| 5179 |
+
{"current_steps": 25830, "total_steps": 38160, "loss": 0.4716, "lr": 1.4287612982938946e-05, "epoch": 13.537735849056604, "percentage": 67.69, "elapsed_time": "1:18:01", "remaining_time": "0:37:14", "throughput": 3598.14, "total_tokens": 16845320}
|
| 5180 |
+
{"current_steps": 25835, "total_steps": 38160, "loss": 0.4838, "lr": 1.4277282725075523e-05, "epoch": 13.54035639412998, "percentage": 67.7, "elapsed_time": "1:18:02", "remaining_time": "0:37:13", "throughput": 3598.15, "total_tokens": 16848552}
|
| 5181 |
+
{"current_steps": 25840, "total_steps": 38160, "loss": 0.5794, "lr": 1.426695471028221e-05, "epoch": 13.542976939203355, "percentage": 67.71, "elapsed_time": "1:18:03", "remaining_time": "0:37:13", "throughput": 3598.42, "total_tokens": 16853608}
|
| 5182 |
+
{"current_steps": 25845, "total_steps": 38160, "loss": 0.4541, "lr": 1.4256628940719524e-05, "epoch": 13.54559748427673, "percentage": 67.73, "elapsed_time": "1:18:04", "remaining_time": "0:37:12", "throughput": 3598.4, "total_tokens": 16856648}
|
| 5183 |
+
{"current_steps": 25850, "total_steps": 38160, "loss": 0.5183, "lr": 1.42463054185475e-05, "epoch": 13.548218029350105, "percentage": 67.74, "elapsed_time": "1:18:05", "remaining_time": "0:37:11", "throughput": 3598.6, "total_tokens": 16861864}
|
| 5184 |
+
{"current_steps": 25855, "total_steps": 38160, "loss": 0.4545, "lr": 1.4235984145925705e-05, "epoch": 13.55083857442348, "percentage": 67.75, "elapsed_time": "1:18:06", "remaining_time": "0:37:10", "throughput": 3598.64, "total_tokens": 16865256}
|
| 5185 |
+
{"current_steps": 25860, "total_steps": 38160, "loss": 0.456, "lr": 1.4225665125013224e-05, "epoch": 13.553459119496855, "percentage": 67.77, "elapsed_time": "1:18:07", "remaining_time": "0:37:09", "throughput": 3598.75, "total_tokens": 16868936}
|
| 5186 |
+
{"current_steps": 25865, "total_steps": 38160, "loss": 0.4788, "lr": 1.4215348357968669e-05, "epoch": 13.55607966457023, "percentage": 67.78, "elapsed_time": "1:18:08", "remaining_time": "0:37:08", "throughput": 3598.77, "total_tokens": 16872200}
|
| 5187 |
+
{"current_steps": 25870, "total_steps": 38160, "loss": 0.4408, "lr": 1.42050338469502e-05, "epoch": 13.558700209643606, "percentage": 67.79, "elapsed_time": "1:18:09", "remaining_time": "0:37:07", "throughput": 3599.11, "total_tokens": 16878824}
|
| 5188 |
+
{"current_steps": 25875, "total_steps": 38160, "loss": 0.4044, "lr": 1.4194721594115495e-05, "epoch": 13.56132075471698, "percentage": 67.81, "elapsed_time": "1:18:10", "remaining_time": "0:37:07", "throughput": 3599.24, "total_tokens": 16883080}
|
| 5189 |
+
{"current_steps": 25880, "total_steps": 38160, "loss": 0.5806, "lr": 1.4184411601621769e-05, "epoch": 13.563941299790356, "percentage": 67.82, "elapsed_time": "1:18:11", "remaining_time": "0:37:06", "throughput": 3599.44, "total_tokens": 16887336}
|
| 5190 |
+
{"current_steps": 25885, "total_steps": 38160, "loss": 0.5104, "lr": 1.4174103871625738e-05, "epoch": 13.566561844863731, "percentage": 67.83, "elapsed_time": "1:18:12", "remaining_time": "0:37:05", "throughput": 3599.53, "total_tokens": 16890760}
|
| 5191 |
+
{"current_steps": 25890, "total_steps": 38160, "loss": 0.4094, "lr": 1.4163798406283674e-05, "epoch": 13.569182389937106, "percentage": 67.85, "elapsed_time": "1:18:13", "remaining_time": "0:37:04", "throughput": 3599.52, "total_tokens": 16893928}
|
| 5192 |
+
{"current_steps": 25895, "total_steps": 38160, "loss": 0.3505, "lr": 1.415349520775135e-05, "epoch": 13.571802935010481, "percentage": 67.86, "elapsed_time": "1:18:14", "remaining_time": "0:37:03", "throughput": 3599.56, "total_tokens": 16897032}
|
| 5193 |
+
{"current_steps": 25900, "total_steps": 38160, "loss": 0.406, "lr": 1.4143194278184086e-05, "epoch": 13.574423480083858, "percentage": 67.87, "elapsed_time": "1:18:15", "remaining_time": "0:37:02", "throughput": 3599.63, "total_tokens": 16900648}
|
| 5194 |
+
{"current_steps": 25905, "total_steps": 38160, "loss": 0.6101, "lr": 1.4132895619736724e-05, "epoch": 13.577044025157234, "percentage": 67.89, "elapsed_time": "1:18:15", "remaining_time": "0:37:01", "throughput": 3599.65, "total_tokens": 16903880}
|
| 5195 |
+
{"current_steps": 25910, "total_steps": 38160, "loss": 0.5468, "lr": 1.4122599234563613e-05, "epoch": 13.579664570230609, "percentage": 67.9, "elapsed_time": "1:18:16", "remaining_time": "0:37:00", "throughput": 3599.78, "total_tokens": 16907816}
|
| 5196 |
+
{"current_steps": 25915, "total_steps": 38160, "loss": 0.4118, "lr": 1.4112305124818647e-05, "epoch": 13.582285115303984, "percentage": 67.91, "elapsed_time": "1:18:17", "remaining_time": "0:36:59", "throughput": 3599.78, "total_tokens": 16910664}
|
| 5197 |
+
{"current_steps": 25920, "total_steps": 38160, "loss": 0.5192, "lr": 1.4102013292655242e-05, "epoch": 13.584905660377359, "percentage": 67.92, "elapsed_time": "1:18:18", "remaining_time": "0:36:58", "throughput": 3599.91, "total_tokens": 16914408}
|
| 5198 |
+
{"current_steps": 25925, "total_steps": 38160, "loss": 0.619, "lr": 1.409172374022632e-05, "epoch": 13.587526205450734, "percentage": 67.94, "elapsed_time": "1:18:19", "remaining_time": "0:36:57", "throughput": 3599.94, "total_tokens": 16918120}
|
| 5199 |
+
{"current_steps": 25930, "total_steps": 38160, "loss": 0.5519, "lr": 1.4081436469684339e-05, "epoch": 13.59014675052411, "percentage": 67.95, "elapsed_time": "1:18:20", "remaining_time": "0:36:56", "throughput": 3599.88, "total_tokens": 16920680}
|
| 5200 |
+
{"current_steps": 25935, "total_steps": 38160, "loss": 0.4685, "lr": 1.4071151483181294e-05, "epoch": 13.592767295597485, "percentage": 67.96, "elapsed_time": "1:18:21", "remaining_time": "0:36:56", "throughput": 3599.84, "total_tokens": 16923592}
|
| 5201 |
+
{"current_steps": 25940, "total_steps": 38160, "loss": 0.4047, "lr": 1.4060868782868669e-05, "epoch": 13.59538784067086, "percentage": 67.98, "elapsed_time": "1:18:22", "remaining_time": "0:36:55", "throughput": 3599.83, "total_tokens": 16926920}
|
| 5202 |
+
{"current_steps": 25945, "total_steps": 38160, "loss": 0.5376, "lr": 1.405058837089749e-05, "epoch": 13.598008385744235, "percentage": 67.99, "elapsed_time": "1:18:23", "remaining_time": "0:36:54", "throughput": 3599.84, "total_tokens": 16930056}
|
| 5203 |
+
{"current_steps": 25950, "total_steps": 38160, "loss": 0.5225, "lr": 1.404031024941832e-05, "epoch": 13.60062893081761, "percentage": 68.0, "elapsed_time": "1:18:23", "remaining_time": "0:36:53", "throughput": 3599.68, "total_tokens": 16932424}
|
| 5204 |
+
{"current_steps": 25955, "total_steps": 38160, "loss": 0.4146, "lr": 1.40300344205812e-05, "epoch": 13.603249475890985, "percentage": 68.02, "elapsed_time": "1:18:24", "remaining_time": "0:36:52", "throughput": 3599.77, "total_tokens": 16936200}
|
| 5205 |
+
{"current_steps": 25960, "total_steps": 38160, "loss": 0.4696, "lr": 1.4019760886535738e-05, "epoch": 13.60587002096436, "percentage": 68.03, "elapsed_time": "1:18:25", "remaining_time": "0:36:51", "throughput": 3599.7, "total_tokens": 16939176}
|
| 5206 |
+
{"current_steps": 25965, "total_steps": 38160, "loss": 0.4477, "lr": 1.4009489649431018e-05, "epoch": 13.608490566037736, "percentage": 68.04, "elapsed_time": "1:18:26", "remaining_time": "0:36:50", "throughput": 3599.81, "total_tokens": 16943048}
|
| 5207 |
+
{"current_steps": 25970, "total_steps": 38160, "loss": 0.4522, "lr": 1.3999220711415675e-05, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "1:18:27", "remaining_time": "0:36:49", "throughput": 3599.8, "total_tokens": 16946056}
|
| 5208 |
+
{"current_steps": 25975, "total_steps": 38160, "loss": 0.5333, "lr": 1.3988954074637867e-05, "epoch": 13.613731656184486, "percentage": 68.07, "elapsed_time": "1:18:28", "remaining_time": "0:36:48", "throughput": 3599.7, "total_tokens": 16948712}
|
| 5209 |
+
{"current_steps": 25980, "total_steps": 38160, "loss": 0.5162, "lr": 1.3978689741245238e-05, "epoch": 13.616352201257861, "percentage": 68.08, "elapsed_time": "1:18:29", "remaining_time": "0:36:47", "throughput": 3599.76, "total_tokens": 16952168}
|
| 5210 |
+
{"current_steps": 25985, "total_steps": 38160, "loss": 0.4632, "lr": 1.3968427713384986e-05, "epoch": 13.618972746331236, "percentage": 68.09, "elapsed_time": "1:18:30", "remaining_time": "0:36:46", "throughput": 3599.78, "total_tokens": 16955496}
|
| 5211 |
+
{"current_steps": 25990, "total_steps": 38160, "loss": 0.5206, "lr": 1.3958167993203791e-05, "epoch": 13.621593291404611, "percentage": 68.11, "elapsed_time": "1:18:31", "remaining_time": "0:36:45", "throughput": 3599.74, "total_tokens": 16958472}
|
| 5212 |
+
{"current_steps": 25995, "total_steps": 38160, "loss": 0.4766, "lr": 1.3947910582847879e-05, "epoch": 13.624213836477988, "percentage": 68.12, "elapsed_time": "1:18:31", "remaining_time": "0:36:45", "throughput": 3599.78, "total_tokens": 16961832}
|
| 5213 |
+
{"current_steps": 26000, "total_steps": 38160, "loss": 0.3937, "lr": 1.3937655484462985e-05, "epoch": 13.626834381551364, "percentage": 68.13, "elapsed_time": "1:18:32", "remaining_time": "0:36:44", "throughput": 3599.71, "total_tokens": 16964552}
|
| 5214 |
+
{"current_steps": 26005, "total_steps": 38160, "loss": 0.4125, "lr": 1.392740270019437e-05, "epoch": 13.629454926624739, "percentage": 68.15, "elapsed_time": "1:18:33", "remaining_time": "0:36:43", "throughput": 3599.7, "total_tokens": 16967688}
|
| 5215 |
+
{"current_steps": 26010, "total_steps": 38160, "loss": 0.5031, "lr": 1.3917152232186776e-05, "epoch": 13.632075471698114, "percentage": 68.16, "elapsed_time": "1:18:34", "remaining_time": "0:36:42", "throughput": 3599.7, "total_tokens": 16971176}
|
| 5216 |
+
{"current_steps": 26015, "total_steps": 38160, "loss": 0.409, "lr": 1.3906904082584504e-05, "epoch": 13.634696016771489, "percentage": 68.17, "elapsed_time": "1:18:35", "remaining_time": "0:36:41", "throughput": 3599.75, "total_tokens": 16974568}
|
| 5217 |
+
{"current_steps": 26020, "total_steps": 38160, "loss": 0.5168, "lr": 1.3896658253531334e-05, "epoch": 13.637316561844864, "percentage": 68.19, "elapsed_time": "1:18:36", "remaining_time": "0:36:40", "throughput": 3599.94, "total_tokens": 16978728}
|
| 5218 |
+
{"current_steps": 26025, "total_steps": 38160, "loss": 0.3526, "lr": 1.3886414747170581e-05, "epoch": 13.63993710691824, "percentage": 68.2, "elapsed_time": "1:18:37", "remaining_time": "0:36:39", "throughput": 3599.88, "total_tokens": 16981928}
|
| 5219 |
+
{"current_steps": 26030, "total_steps": 38160, "loss": 0.4916, "lr": 1.3876173565645072e-05, "epoch": 13.642557651991615, "percentage": 68.21, "elapsed_time": "1:18:38", "remaining_time": "0:36:38", "throughput": 3599.73, "total_tokens": 16984104}
|
| 5220 |
+
{"current_steps": 26035, "total_steps": 38160, "loss": 0.4978, "lr": 1.3865934711097156e-05, "epoch": 13.64517819706499, "percentage": 68.23, "elapsed_time": "1:18:38", "remaining_time": "0:36:37", "throughput": 3599.71, "total_tokens": 16986952}
|
| 5221 |
+
{"current_steps": 26040, "total_steps": 38160, "loss": 0.3534, "lr": 1.3855698185668675e-05, "epoch": 13.647798742138365, "percentage": 68.24, "elapsed_time": "1:18:40", "remaining_time": "0:36:37", "throughput": 3599.91, "total_tokens": 16993160}
|
| 5222 |
+
{"current_steps": 26045, "total_steps": 38160, "loss": 0.5089, "lr": 1.3845463991500985e-05, "epoch": 13.65041928721174, "percentage": 68.25, "elapsed_time": "1:18:41", "remaining_time": "0:36:36", "throughput": 3599.77, "total_tokens": 16995560}
|
| 5223 |
+
{"current_steps": 26050, "total_steps": 38160, "loss": 0.5208, "lr": 1.3835232130734968e-05, "epoch": 13.653039832285115, "percentage": 68.27, "elapsed_time": "1:18:42", "remaining_time": "0:36:35", "throughput": 3599.84, "total_tokens": 16999016}
|
| 5224 |
+
{"current_steps": 26055, "total_steps": 38160, "loss": 0.378, "lr": 1.3825002605511017e-05, "epoch": 13.65566037735849, "percentage": 68.28, "elapsed_time": "1:18:42", "remaining_time": "0:36:34", "throughput": 3599.84, "total_tokens": 17001960}
|
| 5225 |
+
{"current_steps": 26060, "total_steps": 38160, "loss": 0.555, "lr": 1.3814775417969037e-05, "epoch": 13.658280922431866, "percentage": 68.29, "elapsed_time": "1:18:43", "remaining_time": "0:36:33", "throughput": 3599.87, "total_tokens": 17004936}
|
| 5226 |
+
{"current_steps": 26065, "total_steps": 38160, "loss": 0.3856, "lr": 1.3804550570248431e-05, "epoch": 13.66090146750524, "percentage": 68.3, "elapsed_time": "1:18:44", "remaining_time": "0:36:32", "throughput": 3600.03, "total_tokens": 17008584}
|
| 5227 |
+
{"current_steps": 26070, "total_steps": 38160, "loss": 0.4818, "lr": 1.3794328064488116e-05, "epoch": 13.663522012578616, "percentage": 68.32, "elapsed_time": "1:18:45", "remaining_time": "0:36:31", "throughput": 3599.97, "total_tokens": 17011048}
|
| 5228 |
+
{"current_steps": 26075, "total_steps": 38160, "loss": 0.398, "lr": 1.3784107902826523e-05, "epoch": 13.666142557651991, "percentage": 68.33, "elapsed_time": "1:18:46", "remaining_time": "0:36:30", "throughput": 3599.83, "total_tokens": 17013384}
|
| 5229 |
+
{"current_steps": 26080, "total_steps": 38160, "loss": 0.4687, "lr": 1.3773890087401603e-05, "epoch": 13.668763102725366, "percentage": 68.34, "elapsed_time": "1:18:47", "remaining_time": "0:36:29", "throughput": 3599.83, "total_tokens": 17016584}
|
| 5230 |
+
{"current_steps": 26085, "total_steps": 38160, "loss": 0.5032, "lr": 1.3763674620350802e-05, "epoch": 13.671383647798741, "percentage": 68.36, "elapsed_time": "1:18:47", "remaining_time": "0:36:28", "throughput": 3599.79, "total_tokens": 17019368}
|
| 5231 |
+
{"current_steps": 26090, "total_steps": 38160, "loss": 0.4095, "lr": 1.3753461503811089e-05, "epoch": 13.674004192872118, "percentage": 68.37, "elapsed_time": "1:18:48", "remaining_time": "0:36:27", "throughput": 3599.73, "total_tokens": 17022184}
|
| 5232 |
+
{"current_steps": 26095, "total_steps": 38160, "loss": 0.4467, "lr": 1.3743250739918922e-05, "epoch": 13.676624737945493, "percentage": 68.38, "elapsed_time": "1:18:49", "remaining_time": "0:36:26", "throughput": 3599.73, "total_tokens": 17025288}
|
| 5233 |
+
{"current_steps": 26100, "total_steps": 38160, "loss": 0.4191, "lr": 1.3733042330810265e-05, "epoch": 13.679245283018869, "percentage": 68.4, "elapsed_time": "1:18:50", "remaining_time": "0:36:25", "throughput": 3599.81, "total_tokens": 17028424}
|
| 5234 |
+
{"current_steps": 26105, "total_steps": 38160, "loss": 0.3566, "lr": 1.3722836278620616e-05, "epoch": 13.681865828092244, "percentage": 68.41, "elapsed_time": "1:18:51", "remaining_time": "0:36:24", "throughput": 3599.84, "total_tokens": 17031720}
|
| 5235 |
+
{"current_steps": 26110, "total_steps": 38160, "loss": 0.5311, "lr": 1.3712632585484957e-05, "epoch": 13.684486373165619, "percentage": 68.42, "elapsed_time": "1:18:52", "remaining_time": "0:36:23", "throughput": 3599.88, "total_tokens": 17034888}
|
| 5236 |
+
{"current_steps": 26115, "total_steps": 38160, "loss": 0.4944, "lr": 1.37024312535378e-05, "epoch": 13.687106918238994, "percentage": 68.44, "elapsed_time": "1:18:53", "remaining_time": "0:36:23", "throughput": 3600.05, "total_tokens": 17041576}
|
| 5237 |
+
{"current_steps": 26120, "total_steps": 38160, "loss": 0.4856, "lr": 1.3692232284913133e-05, "epoch": 13.68972746331237, "percentage": 68.45, "elapsed_time": "1:18:54", "remaining_time": "0:36:22", "throughput": 3599.97, "total_tokens": 17044424}
|
| 5238 |
+
{"current_steps": 26125, "total_steps": 38160, "loss": 0.4068, "lr": 1.3682035681744459e-05, "epoch": 13.692348008385745, "percentage": 68.46, "elapsed_time": "1:18:55", "remaining_time": "0:36:21", "throughput": 3600.05, "total_tokens": 17047976}
|
| 5239 |
+
{"current_steps": 26130, "total_steps": 38160, "loss": 0.6642, "lr": 1.3671841446164795e-05, "epoch": 13.69496855345912, "percentage": 68.47, "elapsed_time": "1:18:56", "remaining_time": "0:36:20", "throughput": 3600.07, "total_tokens": 17051208}
|
| 5240 |
+
{"current_steps": 26135, "total_steps": 38160, "loss": 0.4458, "lr": 1.3661649580306669e-05, "epoch": 13.697589098532495, "percentage": 68.49, "elapsed_time": "1:18:57", "remaining_time": "0:36:19", "throughput": 3599.97, "total_tokens": 17053640}
|
| 5241 |
+
{"current_steps": 26140, "total_steps": 38160, "loss": 0.4097, "lr": 1.3651460086302087e-05, "epoch": 13.70020964360587, "percentage": 68.5, "elapsed_time": "1:18:58", "remaining_time": "0:36:18", "throughput": 3599.99, "total_tokens": 17056968}
|
| 5242 |
+
{"current_steps": 26145, "total_steps": 38160, "loss": 0.5036, "lr": 1.3641272966282609e-05, "epoch": 13.702830188679245, "percentage": 68.51, "elapsed_time": "1:18:58", "remaining_time": "0:36:17", "throughput": 3599.98, "total_tokens": 17059688}
|
| 5243 |
+
{"current_steps": 26150, "total_steps": 38160, "loss": 0.507, "lr": 1.3631088222379218e-05, "epoch": 13.70545073375262, "percentage": 68.53, "elapsed_time": "1:18:59", "remaining_time": "0:36:16", "throughput": 3600.26, "total_tokens": 17064712}
|
| 5244 |
+
{"current_steps": 26155, "total_steps": 38160, "loss": 0.4059, "lr": 1.3620905856722468e-05, "epoch": 13.708071278825996, "percentage": 68.54, "elapsed_time": "1:19:00", "remaining_time": "0:36:15", "throughput": 3600.24, "total_tokens": 17067528}
|
| 5245 |
+
{"current_steps": 26160, "total_steps": 38160, "loss": 0.4084, "lr": 1.3610725871442392e-05, "epoch": 13.71069182389937, "percentage": 68.55, "elapsed_time": "1:19:01", "remaining_time": "0:36:15", "throughput": 3600.32, "total_tokens": 17070952}
|
| 5246 |
+
{"current_steps": 26165, "total_steps": 38160, "loss": 0.5867, "lr": 1.3600548268668532e-05, "epoch": 13.713312368972746, "percentage": 68.57, "elapsed_time": "1:19:02", "remaining_time": "0:36:14", "throughput": 3600.35, "total_tokens": 17073992}
|
| 5247 |
+
{"current_steps": 26170, "total_steps": 38160, "loss": 0.4576, "lr": 1.3590373050529927e-05, "epoch": 13.715932914046121, "percentage": 68.58, "elapsed_time": "1:19:03", "remaining_time": "0:36:13", "throughput": 3600.58, "total_tokens": 17078376}
|
| 5248 |
+
{"current_steps": 26175, "total_steps": 38160, "loss": 0.516, "lr": 1.3580200219155116e-05, "epoch": 13.718553459119496, "percentage": 68.59, "elapsed_time": "1:19:04", "remaining_time": "0:36:12", "throughput": 3600.76, "total_tokens": 17082952}
|
| 5249 |
+
{"current_steps": 26180, "total_steps": 38160, "loss": 0.5023, "lr": 1.3570029776672122e-05, "epoch": 13.721174004192871, "percentage": 68.61, "elapsed_time": "1:19:05", "remaining_time": "0:36:11", "throughput": 3600.71, "total_tokens": 17085640}
|
| 5250 |
+
{"current_steps": 26185, "total_steps": 38160, "loss": 0.3613, "lr": 1.35598617252085e-05, "epoch": 13.723794549266248, "percentage": 68.62, "elapsed_time": "1:19:05", "remaining_time": "0:36:10", "throughput": 3600.82, "total_tokens": 17089096}
|
| 5251 |
+
{"current_steps": 26190, "total_steps": 38160, "loss": 0.6529, "lr": 1.354969606689129e-05, "epoch": 13.726415094339622, "percentage": 68.63, "elapsed_time": "1:19:06", "remaining_time": "0:36:09", "throughput": 3600.78, "total_tokens": 17091752}
|
| 5252 |
+
{"current_steps": 26195, "total_steps": 38160, "loss": 0.5061, "lr": 1.353953280384704e-05, "epoch": 13.729035639412999, "percentage": 68.65, "elapsed_time": "1:19:07", "remaining_time": "0:36:08", "throughput": 3600.85, "total_tokens": 17094824}
|
| 5253 |
+
{"current_steps": 26200, "total_steps": 38160, "loss": 0.6367, "lr": 1.352937193820177e-05, "epoch": 13.731656184486374, "percentage": 68.66, "elapsed_time": "1:19:08", "remaining_time": "0:36:07", "throughput": 3600.89, "total_tokens": 17097992}
|
| 5254 |
+
{"current_steps": 26205, "total_steps": 38160, "loss": 0.5818, "lr": 1.3519213472081038e-05, "epoch": 13.734276729559749, "percentage": 68.67, "elapsed_time": "1:19:09", "remaining_time": "0:36:06", "throughput": 3600.87, "total_tokens": 17100968}
|
| 5255 |
+
{"current_steps": 26210, "total_steps": 38160, "loss": 0.5117, "lr": 1.3509057407609854e-05, "epoch": 13.736897274633124, "percentage": 68.68, "elapsed_time": "1:19:09", "remaining_time": "0:36:05", "throughput": 3600.77, "total_tokens": 17103496}
|
| 5256 |
+
{"current_steps": 26215, "total_steps": 38160, "loss": 0.4495, "lr": 1.3498903746912769e-05, "epoch": 13.7395178197065, "percentage": 68.7, "elapsed_time": "1:19:10", "remaining_time": "0:36:04", "throughput": 3600.88, "total_tokens": 17106920}
|
| 5257 |
+
{"current_steps": 26220, "total_steps": 38160, "loss": 0.4216, "lr": 1.3488752492113807e-05, "epoch": 13.742138364779874, "percentage": 68.71, "elapsed_time": "1:19:11", "remaining_time": "0:36:03", "throughput": 3600.87, "total_tokens": 17110472}
|
| 5258 |
+
{"current_steps": 26225, "total_steps": 38160, "loss": 0.5346, "lr": 1.3478603645336508e-05, "epoch": 13.74475890985325, "percentage": 68.72, "elapsed_time": "1:19:12", "remaining_time": "0:36:02", "throughput": 3600.92, "total_tokens": 17113640}
|
| 5259 |
+
{"current_steps": 26230, "total_steps": 38160, "loss": 0.6037, "lr": 1.3468457208703877e-05, "epoch": 13.747379454926625, "percentage": 68.74, "elapsed_time": "1:19:13", "remaining_time": "0:36:01", "throughput": 3600.96, "total_tokens": 17116808}
|
| 5260 |
+
{"current_steps": 26235, "total_steps": 38160, "loss": 0.5087, "lr": 1.3458313184338437e-05, "epoch": 13.75, "percentage": 68.75, "elapsed_time": "1:19:14", "remaining_time": "0:36:01", "throughput": 3600.82, "total_tokens": 17119144}
|
| 5261 |
+
{"current_steps": 26240, "total_steps": 38160, "loss": 0.4196, "lr": 1.3448171574362201e-05, "epoch": 13.752620545073375, "percentage": 68.76, "elapsed_time": "1:19:15", "remaining_time": "0:36:00", "throughput": 3600.81, "total_tokens": 17122056}
|
| 5262 |
+
{"current_steps": 26245, "total_steps": 38160, "loss": 0.4029, "lr": 1.3438032380896681e-05, "epoch": 13.75524109014675, "percentage": 68.78, "elapsed_time": "1:19:15", "remaining_time": "0:35:59", "throughput": 3600.86, "total_tokens": 17125352}
|
| 5263 |
+
{"current_steps": 26250, "total_steps": 38160, "loss": 0.444, "lr": 1.342789560606289e-05, "epoch": 13.757861635220126, "percentage": 68.79, "elapsed_time": "1:19:16", "remaining_time": "0:35:58", "throughput": 3600.86, "total_tokens": 17128456}
|
| 5264 |
+
{"current_steps": 26255, "total_steps": 38160, "loss": 0.3552, "lr": 1.341776125198131e-05, "epoch": 13.7604821802935, "percentage": 68.8, "elapsed_time": "1:19:17", "remaining_time": "0:35:57", "throughput": 3600.87, "total_tokens": 17131496}
|
| 5265 |
+
{"current_steps": 26260, "total_steps": 38160, "loss": 0.3498, "lr": 1.3407629320771948e-05, "epoch": 13.763102725366876, "percentage": 68.82, "elapsed_time": "1:19:18", "remaining_time": "0:35:56", "throughput": 3601.06, "total_tokens": 17135528}
|
| 5266 |
+
{"current_steps": 26265, "total_steps": 38160, "loss": 0.4792, "lr": 1.3397499814554266e-05, "epoch": 13.765723270440251, "percentage": 68.83, "elapsed_time": "1:19:19", "remaining_time": "0:35:55", "throughput": 3601.15, "total_tokens": 17138728}
|
| 5267 |
+
{"current_steps": 26270, "total_steps": 38160, "loss": 0.4894, "lr": 1.3387372735447256e-05, "epoch": 13.768343815513626, "percentage": 68.84, "elapsed_time": "1:19:20", "remaining_time": "0:35:54", "throughput": 3601.28, "total_tokens": 17142600}
|
| 5268 |
+
{"current_steps": 26275, "total_steps": 38160, "loss": 0.5423, "lr": 1.3377248085569399e-05, "epoch": 13.770964360587001, "percentage": 68.85, "elapsed_time": "1:19:20", "remaining_time": "0:35:53", "throughput": 3601.25, "total_tokens": 17145448}
|
| 5269 |
+
{"current_steps": 26280, "total_steps": 38160, "loss": 0.3743, "lr": 1.336712586703863e-05, "epoch": 13.773584905660378, "percentage": 68.87, "elapsed_time": "1:19:21", "remaining_time": "0:35:52", "throughput": 3601.35, "total_tokens": 17148712}
|
| 5270 |
+
{"current_steps": 26285, "total_steps": 38160, "loss": 0.4234, "lr": 1.3357006081972414e-05, "epoch": 13.776205450733752, "percentage": 68.88, "elapsed_time": "1:19:22", "remaining_time": "0:35:51", "throughput": 3601.27, "total_tokens": 17151144}
|
| 5271 |
+
{"current_steps": 26290, "total_steps": 38160, "loss": 0.4038, "lr": 1.3346888732487706e-05, "epoch": 13.778825995807129, "percentage": 68.89, "elapsed_time": "1:19:23", "remaining_time": "0:35:50", "throughput": 3601.23, "total_tokens": 17153864}
|
| 5272 |
+
{"current_steps": 26295, "total_steps": 38160, "loss": 0.4918, "lr": 1.3336773820700921e-05, "epoch": 13.781446540880504, "percentage": 68.91, "elapsed_time": "1:19:24", "remaining_time": "0:35:49", "throughput": 3601.25, "total_tokens": 17156968}
|
| 5273 |
+
{"current_steps": 26300, "total_steps": 38160, "loss": 0.5447, "lr": 1.3326661348727992e-05, "epoch": 13.784067085953879, "percentage": 68.92, "elapsed_time": "1:19:25", "remaining_time": "0:35:48", "throughput": 3601.3, "total_tokens": 17160520}
|
| 5274 |
+
{"current_steps": 26305, "total_steps": 38160, "loss": 0.4183, "lr": 1.3316551318684342e-05, "epoch": 13.786687631027254, "percentage": 68.93, "elapsed_time": "1:19:25", "remaining_time": "0:35:47", "throughput": 3601.28, "total_tokens": 17163368}
|
| 5275 |
+
{"current_steps": 26310, "total_steps": 38160, "loss": 0.6572, "lr": 1.3306443732684854e-05, "epoch": 13.78930817610063, "percentage": 68.95, "elapsed_time": "1:19:26", "remaining_time": "0:35:46", "throughput": 3601.25, "total_tokens": 17166472}
|
| 5276 |
+
{"current_steps": 26315, "total_steps": 38160, "loss": 0.5749, "lr": 1.3296338592843929e-05, "epoch": 13.791928721174004, "percentage": 68.96, "elapsed_time": "1:19:27", "remaining_time": "0:35:46", "throughput": 3601.34, "total_tokens": 17170088}
|
| 5277 |
+
{"current_steps": 26320, "total_steps": 38160, "loss": 0.4618, "lr": 1.3286235901275457e-05, "epoch": 13.79454926624738, "percentage": 68.97, "elapsed_time": "1:19:28", "remaining_time": "0:35:45", "throughput": 3601.24, "total_tokens": 17172840}
|
| 5278 |
+
{"current_steps": 26325, "total_steps": 38160, "loss": 0.4207, "lr": 1.327613566009279e-05, "epoch": 13.797169811320755, "percentage": 68.99, "elapsed_time": "1:19:29", "remaining_time": "0:35:44", "throughput": 3601.2, "total_tokens": 17175752}
|
| 5279 |
+
{"current_steps": 26330, "total_steps": 38160, "loss": 0.5344, "lr": 1.3266037871408798e-05, "epoch": 13.79979035639413, "percentage": 69.0, "elapsed_time": "1:19:30", "remaining_time": "0:35:43", "throughput": 3601.16, "total_tokens": 17178504}
|
| 5280 |
+
{"current_steps": 26335, "total_steps": 38160, "loss": 0.525, "lr": 1.3255942537335805e-05, "epoch": 13.802410901467505, "percentage": 69.01, "elapsed_time": "1:19:31", "remaining_time": "0:35:42", "throughput": 3600.92, "total_tokens": 17180648}
|
| 5281 |
+
{"current_steps": 26340, "total_steps": 38160, "loss": 0.5701, "lr": 1.3245849659985652e-05, "epoch": 13.80503144654088, "percentage": 69.03, "elapsed_time": "1:19:32", "remaining_time": "0:35:41", "throughput": 3601.02, "total_tokens": 17184648}
|
| 5282 |
+
{"current_steps": 26345, "total_steps": 38160, "loss": 0.6097, "lr": 1.3235759241469658e-05, "epoch": 13.807651991614255, "percentage": 69.04, "elapsed_time": "1:19:33", "remaining_time": "0:35:40", "throughput": 3601.06, "total_tokens": 17188264}
|
| 5283 |
+
{"current_steps": 26350, "total_steps": 38160, "loss": 0.3766, "lr": 1.322567128389861e-05, "epoch": 13.81027253668763, "percentage": 69.05, "elapsed_time": "1:19:33", "remaining_time": "0:35:39", "throughput": 3601.08, "total_tokens": 17191464}
|
| 5284 |
+
{"current_steps": 26355, "total_steps": 38160, "loss": 0.4515, "lr": 1.3215585789382812e-05, "epoch": 13.812893081761006, "percentage": 69.06, "elapsed_time": "1:19:34", "remaining_time": "0:35:38", "throughput": 3601.17, "total_tokens": 17194792}
|
| 5285 |
+
{"current_steps": 26360, "total_steps": 38160, "loss": 0.4643, "lr": 1.320550276003201e-05, "epoch": 13.815513626834381, "percentage": 69.08, "elapsed_time": "1:19:35", "remaining_time": "0:35:37", "throughput": 3601.21, "total_tokens": 17197864}
|
| 5286 |
+
{"current_steps": 26365, "total_steps": 38160, "loss": 0.5252, "lr": 1.3195422197955473e-05, "epoch": 13.818134171907756, "percentage": 69.09, "elapsed_time": "1:19:36", "remaining_time": "0:35:36", "throughput": 3601.21, "total_tokens": 17200840}
|
| 5287 |
+
{"current_steps": 26370, "total_steps": 38160, "loss": 0.4202, "lr": 1.3185344105261937e-05, "epoch": 13.820754716981131, "percentage": 69.1, "elapsed_time": "1:19:37", "remaining_time": "0:35:35", "throughput": 3601.27, "total_tokens": 17204296}
|
| 5288 |
+
{"current_steps": 26375, "total_steps": 38160, "loss": 0.4804, "lr": 1.3175268484059639e-05, "epoch": 13.823375262054507, "percentage": 69.12, "elapsed_time": "1:19:38", "remaining_time": "0:35:34", "throughput": 3601.23, "total_tokens": 17206920}
|
| 5289 |
+
{"current_steps": 26380, "total_steps": 38160, "loss": 0.5409, "lr": 1.316519533645626e-05, "epoch": 13.825995807127882, "percentage": 69.13, "elapsed_time": "1:19:38", "remaining_time": "0:35:34", "throughput": 3601.16, "total_tokens": 17209672}
|
| 5290 |
+
{"current_steps": 26385, "total_steps": 38160, "loss": 0.6596, "lr": 1.3155124664559004e-05, "epoch": 13.828616352201259, "percentage": 69.14, "elapsed_time": "1:19:39", "remaining_time": "0:35:33", "throughput": 3601.04, "total_tokens": 17212296}
|
| 5291 |
+
{"current_steps": 26390, "total_steps": 38160, "loss": 0.591, "lr": 1.3145056470474525e-05, "epoch": 13.831236897274634, "percentage": 69.16, "elapsed_time": "1:19:40", "remaining_time": "0:35:32", "throughput": 3601.07, "total_tokens": 17215880}
|
| 5292 |
+
{"current_steps": 26395, "total_steps": 38160, "loss": 0.5335, "lr": 1.313499075630899e-05, "epoch": 13.833857442348009, "percentage": 69.17, "elapsed_time": "1:19:41", "remaining_time": "0:35:31", "throughput": 3601.03, "total_tokens": 17218760}
|
| 5293 |
+
{"current_steps": 26400, "total_steps": 38160, "loss": 0.6174, "lr": 1.312492752416802e-05, "epoch": 13.836477987421384, "percentage": 69.18, "elapsed_time": "1:19:42", "remaining_time": "0:35:30", "throughput": 3601.12, "total_tokens": 17222408}
|
| 5294 |
+
{"current_steps": 26405, "total_steps": 38160, "loss": 0.562, "lr": 1.3114866776156748e-05, "epoch": 13.83909853249476, "percentage": 69.2, "elapsed_time": "1:19:43", "remaining_time": "0:35:29", "throughput": 3601.21, "total_tokens": 17225800}
|
| 5295 |
+
{"current_steps": 26410, "total_steps": 38160, "loss": 0.3757, "lr": 1.3104808514379748e-05, "epoch": 13.841719077568134, "percentage": 69.21, "elapsed_time": "1:19:44", "remaining_time": "0:35:28", "throughput": 3601.32, "total_tokens": 17229256}
|
| 5296 |
+
{"current_steps": 26415, "total_steps": 38160, "loss": 0.6039, "lr": 1.3094752740941094e-05, "epoch": 13.84433962264151, "percentage": 69.22, "elapsed_time": "1:19:44", "remaining_time": "0:35:27", "throughput": 3601.29, "total_tokens": 17232104}
|
| 5297 |
+
{"current_steps": 26420, "total_steps": 38160, "loss": 0.7109, "lr": 1.3084699457944344e-05, "epoch": 13.846960167714885, "percentage": 69.23, "elapsed_time": "1:19:45", "remaining_time": "0:35:26", "throughput": 3601.32, "total_tokens": 17235528}
|
| 5298 |
+
{"current_steps": 26425, "total_steps": 38160, "loss": 0.4366, "lr": 1.3074648667492528e-05, "epoch": 13.84958071278826, "percentage": 69.25, "elapsed_time": "1:19:46", "remaining_time": "0:35:25", "throughput": 3601.23, "total_tokens": 17238088}
|
| 5299 |
+
{"current_steps": 26430, "total_steps": 38160, "loss": 0.4968, "lr": 1.3064600371688171e-05, "epoch": 13.852201257861635, "percentage": 69.26, "elapsed_time": "1:19:47", "remaining_time": "0:35:24", "throughput": 3601.3, "total_tokens": 17241288}
|
| 5300 |
+
{"current_steps": 26435, "total_steps": 38160, "loss": 0.4146, "lr": 1.3054554572633249e-05, "epoch": 13.85482180293501, "percentage": 69.27, "elapsed_time": "1:19:48", "remaining_time": "0:35:23", "throughput": 3601.43, "total_tokens": 17244840}
|
| 5301 |
+
{"current_steps": 26440, "total_steps": 38160, "loss": 0.5056, "lr": 1.3044511272429222e-05, "epoch": 13.857442348008385, "percentage": 69.29, "elapsed_time": "1:19:49", "remaining_time": "0:35:22", "throughput": 3601.43, "total_tokens": 17247688}
|
| 5302 |
+
{"current_steps": 26445, "total_steps": 38160, "loss": 0.5561, "lr": 1.3034470473177038e-05, "epoch": 13.86006289308176, "percentage": 69.3, "elapsed_time": "1:19:50", "remaining_time": "0:35:22", "throughput": 3601.59, "total_tokens": 17252360}
|
| 5303 |
+
{"current_steps": 26450, "total_steps": 38160, "loss": 0.5691, "lr": 1.3024432176977125e-05, "epoch": 13.862683438155136, "percentage": 69.31, "elapsed_time": "1:19:51", "remaining_time": "0:35:21", "throughput": 3601.58, "total_tokens": 17255368}
|
| 5304 |
+
{"current_steps": 26455, "total_steps": 38160, "loss": 0.5737, "lr": 1.3014396385929372e-05, "epoch": 13.865303983228511, "percentage": 69.33, "elapsed_time": "1:19:51", "remaining_time": "0:35:20", "throughput": 3601.44, "total_tokens": 17257832}
|
| 5305 |
+
{"current_steps": 26460, "total_steps": 38160, "loss": 0.3621, "lr": 1.3004363102133165e-05, "epoch": 13.867924528301886, "percentage": 69.34, "elapsed_time": "1:19:52", "remaining_time": "0:35:19", "throughput": 3601.45, "total_tokens": 17261480}
|
| 5306 |
+
{"current_steps": 26465, "total_steps": 38160, "loss": 0.5352, "lr": 1.2994332327687348e-05, "epoch": 13.870545073375261, "percentage": 69.35, "elapsed_time": "1:19:53", "remaining_time": "0:35:18", "throughput": 3601.44, "total_tokens": 17264360}
|
| 5307 |
+
{"current_steps": 26470, "total_steps": 38160, "loss": 0.4513, "lr": 1.2984304064690228e-05, "epoch": 13.873165618448636, "percentage": 69.37, "elapsed_time": "1:19:54", "remaining_time": "0:35:17", "throughput": 3601.51, "total_tokens": 17268104}
|
| 5308 |
+
{"current_steps": 26475, "total_steps": 38160, "loss": 0.4413, "lr": 1.2974278315239619e-05, "epoch": 13.875786163522012, "percentage": 69.38, "elapsed_time": "1:19:55", "remaining_time": "0:35:16", "throughput": 3601.53, "total_tokens": 17271048}
|
| 5309 |
+
{"current_steps": 26480, "total_steps": 38160, "loss": 0.4621, "lr": 1.2964255081432786e-05, "epoch": 13.878406708595389, "percentage": 69.39, "elapsed_time": "1:19:56", "remaining_time": "0:35:15", "throughput": 3601.52, "total_tokens": 17274056}
|
| 5310 |
+
{"current_steps": 26485, "total_steps": 38160, "loss": 0.4734, "lr": 1.2954234365366488e-05, "epoch": 13.881027253668764, "percentage": 69.41, "elapsed_time": "1:19:57", "remaining_time": "0:35:14", "throughput": 3601.51, "total_tokens": 17277192}
|
| 5311 |
+
{"current_steps": 26490, "total_steps": 38160, "loss": 0.4629, "lr": 1.2944216169136936e-05, "epoch": 13.883647798742139, "percentage": 69.42, "elapsed_time": "1:19:58", "remaining_time": "0:35:13", "throughput": 3601.63, "total_tokens": 17281128}
|
| 5312 |
+
{"current_steps": 26495, "total_steps": 38160, "loss": 0.4118, "lr": 1.2934200494839815e-05, "epoch": 13.886268343815514, "percentage": 69.43, "elapsed_time": "1:19:58", "remaining_time": "0:35:12", "throughput": 3601.58, "total_tokens": 17283848}
|
| 5313 |
+
{"current_steps": 26500, "total_steps": 38160, "loss": 0.4021, "lr": 1.2924187344570293e-05, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "1:19:59", "remaining_time": "0:35:11", "throughput": 3601.6, "total_tokens": 17287208}
|
| 5314 |
+
{"current_steps": 26505, "total_steps": 38160, "loss": 0.4166, "lr": 1.2914176720423011e-05, "epoch": 13.891509433962264, "percentage": 69.46, "elapsed_time": "1:20:00", "remaining_time": "0:35:11", "throughput": 3601.7, "total_tokens": 17290696}
|
| 5315 |
+
{"current_steps": 26510, "total_steps": 38160, "loss": 0.4984, "lr": 1.2904168624492088e-05, "epoch": 13.89412997903564, "percentage": 69.47, "elapsed_time": "1:20:01", "remaining_time": "0:35:10", "throughput": 3601.81, "total_tokens": 17294600}
|
| 5316 |
+
{"current_steps": 26515, "total_steps": 38160, "loss": 0.5056, "lr": 1.2894163058871092e-05, "epoch": 13.896750524109015, "percentage": 69.48, "elapsed_time": "1:20:02", "remaining_time": "0:35:09", "throughput": 3601.84, "total_tokens": 17297768}
|
| 5317 |
+
{"current_steps": 26520, "total_steps": 38160, "loss": 0.4652, "lr": 1.2884160025653064e-05, "epoch": 13.89937106918239, "percentage": 69.5, "elapsed_time": "1:20:03", "remaining_time": "0:35:08", "throughput": 3601.8, "total_tokens": 17300808}
|
| 5318 |
+
{"current_steps": 26525, "total_steps": 38160, "loss": 0.5123, "lr": 1.2874159526930532e-05, "epoch": 13.901991614255765, "percentage": 69.51, "elapsed_time": "1:20:04", "remaining_time": "0:35:07", "throughput": 3601.75, "total_tokens": 17303432}
|
| 5319 |
+
{"current_steps": 26530, "total_steps": 38160, "loss": 0.4676, "lr": 1.286416156479549e-05, "epoch": 13.90461215932914, "percentage": 69.52, "elapsed_time": "1:20:04", "remaining_time": "0:35:06", "throughput": 3601.71, "total_tokens": 17306152}
|
| 5320 |
+
{"current_steps": 26535, "total_steps": 38160, "loss": 0.4557, "lr": 1.2854166141339394e-05, "epoch": 13.907232704402515, "percentage": 69.54, "elapsed_time": "1:20:05", "remaining_time": "0:35:05", "throughput": 3601.64, "total_tokens": 17308840}
|
| 5321 |
+
{"current_steps": 26540, "total_steps": 38160, "loss": 0.5716, "lr": 1.2844173258653183e-05, "epoch": 13.90985324947589, "percentage": 69.55, "elapsed_time": "1:20:06", "remaining_time": "0:35:04", "throughput": 3601.68, "total_tokens": 17311944}
|
| 5322 |
+
{"current_steps": 26545, "total_steps": 38160, "loss": 0.4691, "lr": 1.2834182918827247e-05, "epoch": 13.912473794549266, "percentage": 69.56, "elapsed_time": "1:20:07", "remaining_time": "0:35:03", "throughput": 3601.7, "total_tokens": 17314888}
|
| 5323 |
+
{"current_steps": 26550, "total_steps": 38160, "loss": 0.5626, "lr": 1.282419512395144e-05, "epoch": 13.915094339622641, "percentage": 69.58, "elapsed_time": "1:20:08", "remaining_time": "0:35:02", "throughput": 3601.76, "total_tokens": 17318248}
|
| 5324 |
+
{"current_steps": 26555, "total_steps": 38160, "loss": 0.4683, "lr": 1.2814209876115102e-05, "epoch": 13.917714884696016, "percentage": 69.59, "elapsed_time": "1:20:09", "remaining_time": "0:35:01", "throughput": 3601.77, "total_tokens": 17321320}
|
| 5325 |
+
{"current_steps": 26560, "total_steps": 38160, "loss": 0.4449, "lr": 1.2804227177407038e-05, "epoch": 13.920335429769391, "percentage": 69.6, "elapsed_time": "1:20:10", "remaining_time": "0:35:00", "throughput": 3601.79, "total_tokens": 17324680}
|
| 5326 |
+
{"current_steps": 26565, "total_steps": 38160, "loss": 0.4537, "lr": 1.279424702991552e-05, "epoch": 13.922955974842766, "percentage": 69.61, "elapsed_time": "1:20:10", "remaining_time": "0:34:59", "throughput": 3601.86, "total_tokens": 17328072}
|
| 5327 |
+
{"current_steps": 26570, "total_steps": 38160, "loss": 0.5137, "lr": 1.2784269435728264e-05, "epoch": 13.925576519916142, "percentage": 69.63, "elapsed_time": "1:20:11", "remaining_time": "0:34:58", "throughput": 3601.87, "total_tokens": 17331272}
|
| 5328 |
+
{"current_steps": 26575, "total_steps": 38160, "loss": 0.454, "lr": 1.2774294396932488e-05, "epoch": 13.928197064989519, "percentage": 69.64, "elapsed_time": "1:20:12", "remaining_time": "0:34:57", "throughput": 3601.97, "total_tokens": 17334920}
|
| 5329 |
+
{"current_steps": 26580, "total_steps": 38160, "loss": 0.5299, "lr": 1.2764321915614835e-05, "epoch": 13.930817610062894, "percentage": 69.65, "elapsed_time": "1:20:13", "remaining_time": "0:34:57", "throughput": 3602.07, "total_tokens": 17338664}
|
| 5330 |
+
{"current_steps": 26585, "total_steps": 38160, "loss": 0.5859, "lr": 1.2754351993861444e-05, "epoch": 13.933438155136269, "percentage": 69.67, "elapsed_time": "1:20:14", "remaining_time": "0:34:56", "throughput": 3601.97, "total_tokens": 17341000}
|
| 5331 |
+
{"current_steps": 26590, "total_steps": 38160, "loss": 0.4117, "lr": 1.2744384633757922e-05, "epoch": 13.936058700209644, "percentage": 69.68, "elapsed_time": "1:20:15", "remaining_time": "0:34:55", "throughput": 3602.02, "total_tokens": 17344456}
|
| 5332 |
+
{"current_steps": 26595, "total_steps": 38160, "loss": 0.6023, "lr": 1.2734419837389306e-05, "epoch": 13.93867924528302, "percentage": 69.69, "elapsed_time": "1:20:16", "remaining_time": "0:34:54", "throughput": 3602.06, "total_tokens": 17347976}
|
| 5333 |
+
{"current_steps": 26600, "total_steps": 38160, "loss": 0.5093, "lr": 1.2724457606840123e-05, "epoch": 13.941299790356394, "percentage": 69.71, "elapsed_time": "1:20:16", "remaining_time": "0:34:53", "throughput": 3602.13, "total_tokens": 17351336}
|
| 5334 |
+
{"current_steps": 26605, "total_steps": 38160, "loss": 0.4363, "lr": 1.2714497944194376e-05, "epoch": 13.94392033542977, "percentage": 69.72, "elapsed_time": "1:20:18", "remaining_time": "0:34:52", "throughput": 3602.4, "total_tokens": 17356744}
|
| 5335 |
+
{"current_steps": 26610, "total_steps": 38160, "loss": 0.5023, "lr": 1.2704540851535484e-05, "epoch": 13.946540880503145, "percentage": 69.73, "elapsed_time": "1:20:18", "remaining_time": "0:34:51", "throughput": 3602.3, "total_tokens": 17359240}
|
| 5336 |
+
{"current_steps": 26615, "total_steps": 38160, "loss": 0.4783, "lr": 1.2694586330946376e-05, "epoch": 13.94916142557652, "percentage": 69.75, "elapsed_time": "1:20:19", "remaining_time": "0:34:50", "throughput": 3602.44, "total_tokens": 17363592}
|
| 5337 |
+
{"current_steps": 26620, "total_steps": 38160, "loss": 0.7031, "lr": 1.2684634384509427e-05, "epoch": 13.951781970649895, "percentage": 69.76, "elapsed_time": "1:20:20", "remaining_time": "0:34:49", "throughput": 3602.34, "total_tokens": 17365992}
|
| 5338 |
+
{"current_steps": 26625, "total_steps": 38160, "loss": 0.4869, "lr": 1.2674685014306456e-05, "epoch": 13.95440251572327, "percentage": 69.77, "elapsed_time": "1:20:21", "remaining_time": "0:34:48", "throughput": 3602.21, "total_tokens": 17368520}
|
| 5339 |
+
{"current_steps": 26630, "total_steps": 38160, "loss": 0.4528, "lr": 1.2664738222418776e-05, "epoch": 13.957023060796645, "percentage": 69.79, "elapsed_time": "1:20:22", "remaining_time": "0:34:47", "throughput": 3602.28, "total_tokens": 17371880}
|
| 5340 |
+
{"current_steps": 26635, "total_steps": 38160, "loss": 0.414, "lr": 1.2654794010927118e-05, "epoch": 13.95964360587002, "percentage": 69.8, "elapsed_time": "1:20:23", "remaining_time": "0:34:47", "throughput": 3602.26, "total_tokens": 17374600}
|
| 5341 |
+
{"current_steps": 26640, "total_steps": 38160, "loss": 0.3808, "lr": 1.2644852381911715e-05, "epoch": 13.962264150943396, "percentage": 69.81, "elapsed_time": "1:20:24", "remaining_time": "0:34:46", "throughput": 3602.38, "total_tokens": 17378600}
|
| 5342 |
+
{"current_steps": 26645, "total_steps": 38160, "loss": 0.449, "lr": 1.2634913337452248e-05, "epoch": 13.964884696016771, "percentage": 69.82, "elapsed_time": "1:20:25", "remaining_time": "0:34:45", "throughput": 3602.36, "total_tokens": 17381416}
|
| 5343 |
+
{"current_steps": 26650, "total_steps": 38160, "loss": 0.4809, "lr": 1.2624976879627832e-05, "epoch": 13.967505241090146, "percentage": 69.84, "elapsed_time": "1:20:25", "remaining_time": "0:34:44", "throughput": 3602.32, "total_tokens": 17384264}
|
| 5344 |
+
{"current_steps": 26655, "total_steps": 38160, "loss": 0.5684, "lr": 1.2615043010517069e-05, "epoch": 13.970125786163521, "percentage": 69.85, "elapsed_time": "1:20:26", "remaining_time": "0:34:43", "throughput": 3602.42, "total_tokens": 17387912}
|
| 5345 |
+
{"current_steps": 26660, "total_steps": 38160, "loss": 0.3811, "lr": 1.2605111732198027e-05, "epoch": 13.972746331236896, "percentage": 69.86, "elapsed_time": "1:20:27", "remaining_time": "0:34:42", "throughput": 3602.38, "total_tokens": 17390600}
|
| 5346 |
+
{"current_steps": 26665, "total_steps": 38160, "loss": 0.4487, "lr": 1.2595183046748188e-05, "epoch": 13.975366876310272, "percentage": 69.88, "elapsed_time": "1:20:28", "remaining_time": "0:34:41", "throughput": 3602.41, "total_tokens": 17393704}
|
| 5347 |
+
{"current_steps": 26670, "total_steps": 38160, "loss": 0.545, "lr": 1.2585256956244545e-05, "epoch": 13.977987421383649, "percentage": 69.89, "elapsed_time": "1:20:29", "remaining_time": "0:34:40", "throughput": 3602.41, "total_tokens": 17396808}
|
| 5348 |
+
{"current_steps": 26675, "total_steps": 38160, "loss": 0.5163, "lr": 1.2575333462763506e-05, "epoch": 13.980607966457024, "percentage": 69.9, "elapsed_time": "1:20:30", "remaining_time": "0:34:39", "throughput": 3602.45, "total_tokens": 17400040}
|
| 5349 |
+
{"current_steps": 26680, "total_steps": 38160, "loss": 0.3618, "lr": 1.2565412568380957e-05, "epoch": 13.983228511530399, "percentage": 69.92, "elapsed_time": "1:20:30", "remaining_time": "0:34:38", "throughput": 3602.53, "total_tokens": 17403624}
|
| 5350 |
+
{"current_steps": 26685, "total_steps": 38160, "loss": 0.6973, "lr": 1.2555494275172237e-05, "epoch": 13.985849056603774, "percentage": 69.93, "elapsed_time": "1:20:31", "remaining_time": "0:34:37", "throughput": 3602.55, "total_tokens": 17406760}
|
| 5351 |
+
{"current_steps": 26690, "total_steps": 38160, "loss": 0.4826, "lr": 1.2545578585212148e-05, "epoch": 13.98846960167715, "percentage": 69.94, "elapsed_time": "1:20:32", "remaining_time": "0:34:36", "throughput": 3602.58, "total_tokens": 17409832}
|
| 5352 |
+
{"current_steps": 26695, "total_steps": 38160, "loss": 0.4689, "lr": 1.2535665500574922e-05, "epoch": 13.991090146750524, "percentage": 69.96, "elapsed_time": "1:20:33", "remaining_time": "0:34:35", "throughput": 3602.67, "total_tokens": 17413672}
|
| 5353 |
+
{"current_steps": 26700, "total_steps": 38160, "loss": 0.6142, "lr": 1.2525755023334285e-05, "epoch": 13.9937106918239, "percentage": 69.97, "elapsed_time": "1:20:34", "remaining_time": "0:34:34", "throughput": 3602.7, "total_tokens": 17416936}
|
| 5354 |
+
{"current_steps": 26705, "total_steps": 38160, "loss": 0.5109, "lr": 1.251584715556337e-05, "epoch": 13.996331236897275, "percentage": 69.98, "elapsed_time": "1:20:35", "remaining_time": "0:34:34", "throughput": 3602.82, "total_tokens": 17420520}
|
| 5355 |
+
{"current_steps": 26710, "total_steps": 38160, "loss": 0.4349, "lr": 1.2505941899334805e-05, "epoch": 13.99895178197065, "percentage": 69.99, "elapsed_time": "1:20:36", "remaining_time": "0:34:33", "throughput": 3602.95, "total_tokens": 17424840}
|
| 5356 |
+
{"current_steps": 26712, "total_steps": 38160, "eval_loss": 0.46962180733680725, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "1:20:46", "remaining_time": "0:34:37", "throughput": 3595.32, "total_tokens": 17425368}
|
| 5357 |
+
{"current_steps": 26715, "total_steps": 38160, "loss": 0.4798, "lr": 1.2496039256720665e-05, "epoch": 14.001572327044025, "percentage": 70.01, "elapsed_time": "1:20:48", "remaining_time": "0:34:37", "throughput": 3594.3, "total_tokens": 17427448}
|
| 5358 |
+
{"current_steps": 26720, "total_steps": 38160, "loss": 0.5229, "lr": 1.2486139229792452e-05, "epoch": 14.0041928721174, "percentage": 70.02, "elapsed_time": "1:20:49", "remaining_time": "0:34:36", "throughput": 3594.25, "total_tokens": 17430008}
|
| 5359 |
+
{"current_steps": 26725, "total_steps": 38160, "loss": 0.5574, "lr": 1.2476241820621152e-05, "epoch": 14.006813417190775, "percentage": 70.03, "elapsed_time": "1:20:50", "remaining_time": "0:34:35", "throughput": 3594.28, "total_tokens": 17433080}
|