Training in progress, step 45800
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c7198e870e3e15df28e8e060edf9e9dcb780cefd863c23681f9bb2ee59e4831
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -9348,3 +9348,44 @@
|
|
| 9348 |
{"current_steps": 45600, "total_steps": 80000, "eval_loss": 5.0990753173828125, "epoch": 0.3648934127136547, "percentage": 57.0, "elapsed_time": "1 day, 23:34:44", "remaining_time": "1 day, 11:53:34", "throughput": 84.29, "total_tokens": 14437616}
|
| 9349 |
{"current_steps": 45605, "total_steps": 80000, "loss": 6.9887, "lr": 0.11725551969732467, "epoch": 0.3649334229562768, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:53:02", "throughput": 84.3, "total_tokens": 14439176}
|
| 9350 |
{"current_steps": 45610, "total_steps": 80000, "loss": 4.5815, "lr": 0.11722677821706107, "epoch": 0.3649734331988989, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:52:30", "throughput": 84.31, "total_tokens": 14440800}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9348 |
{"current_steps": 45600, "total_steps": 80000, "eval_loss": 5.0990753173828125, "epoch": 0.3648934127136547, "percentage": 57.0, "elapsed_time": "1 day, 23:34:44", "remaining_time": "1 day, 11:53:34", "throughput": 84.29, "total_tokens": 14437616}
|
| 9349 |
{"current_steps": 45605, "total_steps": 80000, "loss": 6.9887, "lr": 0.11725551969732467, "epoch": 0.3649334229562768, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:53:02", "throughput": 84.3, "total_tokens": 14439176}
|
| 9350 |
{"current_steps": 45610, "total_steps": 80000, "loss": 4.5815, "lr": 0.11722677821706107, "epoch": 0.3649734331988989, "percentage": 57.01, "elapsed_time": "1 day, 23:34:46", "remaining_time": "1 day, 11:52:30", "throughput": 84.31, "total_tokens": 14440800}
|
| 9351 |
+
{"current_steps": 45615, "total_steps": 80000, "loss": 5.0775, "lr": 0.11719803800030815, "epoch": 0.36501344344152103, "percentage": 57.02, "elapsed_time": "1 day, 23:34:47", "remaining_time": "1 day, 11:51:57", "throughput": 84.32, "total_tokens": 14442360}
|
| 9352 |
+
{"current_steps": 45620, "total_steps": 80000, "loss": 4.6495, "lr": 0.11716929904817393, "epoch": 0.36505345368414316, "percentage": 57.03, "elapsed_time": "1 day, 23:34:47", "remaining_time": "1 day, 11:51:25", "throughput": 84.32, "total_tokens": 14443800}
|
| 9353 |
+
{"current_steps": 45625, "total_steps": 80000, "loss": 4.5596, "lr": 0.11714056136176634, "epoch": 0.3650934639267652, "percentage": 57.03, "elapsed_time": "1 day, 23:34:48", "remaining_time": "1 day, 11:50:52", "throughput": 84.33, "total_tokens": 14445352}
|
| 9354 |
+
{"current_steps": 45630, "total_steps": 80000, "loss": 5.6308, "lr": 0.11711182494219341, "epoch": 0.36513347416938735, "percentage": 57.04, "elapsed_time": "1 day, 23:34:48", "remaining_time": "1 day, 11:50:20", "throughput": 84.34, "total_tokens": 14446984}
|
| 9355 |
+
{"current_steps": 45635, "total_steps": 80000, "loss": 6.0331, "lr": 0.11708308979056296, "epoch": 0.3651734844120095, "percentage": 57.04, "elapsed_time": "1 day, 23:34:49", "remaining_time": "1 day, 11:49:47", "throughput": 84.35, "total_tokens": 14448576}
|
| 9356 |
+
{"current_steps": 45640, "total_steps": 80000, "loss": 4.4552, "lr": 0.11705435590798277, "epoch": 0.3652134946546316, "percentage": 57.05, "elapsed_time": "1 day, 23:34:50", "remaining_time": "1 day, 11:49:15", "throughput": 84.36, "total_tokens": 14450224}
|
| 9357 |
+
{"current_steps": 45645, "total_steps": 80000, "loss": 4.9775, "lr": 0.11702562329556072, "epoch": 0.3652535048972537, "percentage": 57.06, "elapsed_time": "1 day, 23:34:50", "remaining_time": "1 day, 11:48:43", "throughput": 84.37, "total_tokens": 14451808}
|
| 9358 |
+
{"current_steps": 45650, "total_steps": 80000, "loss": 4.4322, "lr": 0.11699689195440455, "epoch": 0.3652935151398758, "percentage": 57.06, "elapsed_time": "1 day, 23:34:51", "remaining_time": "1 day, 11:48:10", "throughput": 84.38, "total_tokens": 14453376}
|
| 9359 |
+
{"current_steps": 45655, "total_steps": 80000, "loss": 4.7887, "lr": 0.11696816188562179, "epoch": 0.3653335253824979, "percentage": 57.07, "elapsed_time": "1 day, 23:34:51", "remaining_time": "1 day, 11:47:38", "throughput": 84.39, "total_tokens": 14455088}
|
| 9360 |
+
{"current_steps": 45660, "total_steps": 80000, "loss": 3.2378, "lr": 0.11693943309032023, "epoch": 0.36537353562512004, "percentage": 57.07, "elapsed_time": "1 day, 23:34:52", "remaining_time": "1 day, 11:47:05", "throughput": 84.4, "total_tokens": 14456896}
|
| 9361 |
+
{"current_steps": 45665, "total_steps": 80000, "loss": 4.2802, "lr": 0.11691070556960743, "epoch": 0.36541354586774216, "percentage": 57.08, "elapsed_time": "1 day, 23:34:53", "remaining_time": "1 day, 11:46:33", "throughput": 84.41, "total_tokens": 14458496}
|
| 9362 |
+
{"current_steps": 45670, "total_steps": 80000, "loss": 5.4537, "lr": 0.11688197932459085, "epoch": 0.36545355611036423, "percentage": 57.09, "elapsed_time": "1 day, 23:34:53", "remaining_time": "1 day, 11:46:00", "throughput": 84.42, "total_tokens": 14460072}
|
| 9363 |
+
{"current_steps": 45675, "total_steps": 80000, "loss": 4.0324, "lr": 0.11685325435637808, "epoch": 0.36549356635298635, "percentage": 57.09, "elapsed_time": "1 day, 23:34:54", "remaining_time": "1 day, 11:45:28", "throughput": 84.43, "total_tokens": 14461656}
|
| 9364 |
+
{"current_steps": 45680, "total_steps": 80000, "loss": 6.8163, "lr": 0.11682453066607645, "epoch": 0.3655335765956085, "percentage": 57.1, "elapsed_time": "1 day, 23:34:54", "remaining_time": "1 day, 11:44:56", "throughput": 84.44, "total_tokens": 14463576}
|
| 9365 |
+
{"current_steps": 45685, "total_steps": 80000, "loss": 5.0675, "lr": 0.11679580825479345, "epoch": 0.3655735868382306, "percentage": 57.11, "elapsed_time": "1 day, 23:34:55", "remaining_time": "1 day, 11:44:23", "throughput": 84.45, "total_tokens": 14465152}
|
| 9366 |
+
{"current_steps": 45690, "total_steps": 80000, "loss": 5.169, "lr": 0.11676708712363633, "epoch": 0.3656135970808527, "percentage": 57.11, "elapsed_time": "1 day, 23:34:55", "remaining_time": "1 day, 11:43:51", "throughput": 84.45, "total_tokens": 14466600}
|
| 9367 |
+
{"current_steps": 45695, "total_steps": 80000, "loss": 5.3161, "lr": 0.11673836727371245, "epoch": 0.3656536073234748, "percentage": 57.12, "elapsed_time": "1 day, 23:34:56", "remaining_time": "1 day, 11:43:18", "throughput": 84.46, "total_tokens": 14468200}
|
| 9368 |
+
{"current_steps": 45700, "total_steps": 80000, "loss": 5.391, "lr": 0.11670964870612904, "epoch": 0.3656936175660969, "percentage": 57.12, "elapsed_time": "1 day, 23:34:57", "remaining_time": "1 day, 11:42:46", "throughput": 84.47, "total_tokens": 14469704}
|
| 9369 |
+
{"current_steps": 45705, "total_steps": 80000, "loss": 3.8915, "lr": 0.1166809314219932, "epoch": 0.36573362780871904, "percentage": 57.13, "elapsed_time": "1 day, 23:34:57", "remaining_time": "1 day, 11:42:14", "throughput": 84.48, "total_tokens": 14471248}
|
| 9370 |
+
{"current_steps": 45710, "total_steps": 80000, "loss": 6.5092, "lr": 0.11665221542241219, "epoch": 0.36577363805134117, "percentage": 57.14, "elapsed_time": "1 day, 23:34:58", "remaining_time": "1 day, 11:41:41", "throughput": 84.49, "total_tokens": 14472984}
|
| 9371 |
+
{"current_steps": 45715, "total_steps": 80000, "loss": 5.3554, "lr": 0.11662350070849307, "epoch": 0.36581364829396323, "percentage": 57.14, "elapsed_time": "1 day, 23:34:58", "remaining_time": "1 day, 11:41:09", "throughput": 84.5, "total_tokens": 14474504}
|
| 9372 |
+
{"current_steps": 45720, "total_steps": 80000, "loss": 5.8921, "lr": 0.11659478728134282, "epoch": 0.36585365853658536, "percentage": 57.15, "elapsed_time": "1 day, 23:34:59", "remaining_time": "1 day, 11:40:37", "throughput": 84.51, "total_tokens": 14476016}
|
| 9373 |
+
{"current_steps": 45725, "total_steps": 80000, "loss": 5.3527, "lr": 0.11656607514206851, "epoch": 0.3658936687792075, "percentage": 57.16, "elapsed_time": "1 day, 23:35:00", "remaining_time": "1 day, 11:40:04", "throughput": 84.52, "total_tokens": 14477640}
|
| 9374 |
+
{"current_steps": 45730, "total_steps": 80000, "loss": 5.711, "lr": 0.11653736429177709, "epoch": 0.3659336790218296, "percentage": 57.16, "elapsed_time": "1 day, 23:35:00", "remaining_time": "1 day, 11:39:32", "throughput": 84.53, "total_tokens": 14479200}
|
| 9375 |
+
{"current_steps": 45735, "total_steps": 80000, "loss": 5.2179, "lr": 0.11650865473157537, "epoch": 0.3659736892644517, "percentage": 57.17, "elapsed_time": "1 day, 23:35:01", "remaining_time": "1 day, 11:39:00", "throughput": 84.53, "total_tokens": 14480824}
|
| 9376 |
+
{"current_steps": 45740, "total_steps": 80000, "loss": 4.8279, "lr": 0.11647994646257025, "epoch": 0.3660136995070738, "percentage": 57.17, "elapsed_time": "1 day, 23:35:01", "remaining_time": "1 day, 11:38:27", "throughput": 84.54, "total_tokens": 14482568}
|
| 9377 |
+
{"current_steps": 45745, "total_steps": 80000, "loss": 4.1234, "lr": 0.11645123948586855, "epoch": 0.3660537097496959, "percentage": 57.18, "elapsed_time": "1 day, 23:35:02", "remaining_time": "1 day, 11:37:55", "throughput": 84.55, "total_tokens": 14484040}
|
| 9378 |
+
{"current_steps": 45750, "total_steps": 80000, "loss": 4.5525, "lr": 0.11642253380257694, "epoch": 0.36609371999231805, "percentage": 57.19, "elapsed_time": "1 day, 23:35:02", "remaining_time": "1 day, 11:37:23", "throughput": 84.56, "total_tokens": 14485472}
|
| 9379 |
+
{"current_steps": 45755, "total_steps": 80000, "loss": 4.7924, "lr": 0.11639382941380219, "epoch": 0.36613373023494017, "percentage": 57.19, "elapsed_time": "1 day, 23:35:03", "remaining_time": "1 day, 11:36:50", "throughput": 84.57, "total_tokens": 14487016}
|
| 9380 |
+
{"current_steps": 45760, "total_steps": 80000, "loss": 5.106, "lr": 0.11636512632065091, "epoch": 0.36617374047756224, "percentage": 57.2, "elapsed_time": "1 day, 23:35:04", "remaining_time": "1 day, 11:36:18", "throughput": 84.58, "total_tokens": 14488536}
|
| 9381 |
+
{"current_steps": 45765, "total_steps": 80000, "loss": 5.2514, "lr": 0.1163364245242297, "epoch": 0.36621375072018436, "percentage": 57.21, "elapsed_time": "1 day, 23:35:04", "remaining_time": "1 day, 11:35:46", "throughput": 84.59, "total_tokens": 14490112}
|
| 9382 |
+
{"current_steps": 45770, "total_steps": 80000, "loss": 5.9074, "lr": 0.11630772402564503, "epoch": 0.3662537609628065, "percentage": 57.21, "elapsed_time": "1 day, 23:35:05", "remaining_time": "1 day, 11:35:14", "throughput": 84.6, "total_tokens": 14491688}
|
| 9383 |
+
{"current_steps": 45775, "total_steps": 80000, "loss": 5.6634, "lr": 0.11627902482600351, "epoch": 0.3662937712054286, "percentage": 57.22, "elapsed_time": "1 day, 23:35:05", "remaining_time": "1 day, 11:34:41", "throughput": 84.61, "total_tokens": 14493376}
|
| 9384 |
+
{"current_steps": 45780, "total_steps": 80000, "loss": 5.2131, "lr": 0.11625032692641156, "epoch": 0.3663337814480507, "percentage": 57.23, "elapsed_time": "1 day, 23:35:06", "remaining_time": "1 day, 11:34:09", "throughput": 84.61, "total_tokens": 14494880}
|
| 9385 |
+
{"current_steps": 45785, "total_steps": 80000, "loss": 5.1533, "lr": 0.11622163032797546, "epoch": 0.3663737916906728, "percentage": 57.23, "elapsed_time": "1 day, 23:35:06", "remaining_time": "1 day, 11:33:37", "throughput": 84.62, "total_tokens": 14496584}
|
| 9386 |
+
{"current_steps": 45790, "total_steps": 80000, "loss": 5.7625, "lr": 0.11619293503180173, "epoch": 0.3664138019332949, "percentage": 57.24, "elapsed_time": "1 day, 23:35:07", "remaining_time": "1 day, 11:33:04", "throughput": 84.63, "total_tokens": 14498144}
|
| 9387 |
+
{"current_steps": 45795, "total_steps": 80000, "loss": 5.3461, "lr": 0.11616424103899656, "epoch": 0.36645381217591705, "percentage": 57.24, "elapsed_time": "1 day, 23:35:08", "remaining_time": "1 day, 11:32:32", "throughput": 84.64, "total_tokens": 14499664}
|
| 9388 |
+
{"current_steps": 45800, "total_steps": 80000, "loss": 6.2408, "lr": 0.11613554835066617, "epoch": 0.3664938224185392, "percentage": 57.25, "elapsed_time": "1 day, 23:35:08", "remaining_time": "1 day, 11:32:00", "throughput": 84.65, "total_tokens": 14501416}
|
| 9389 |
+
{"current_steps": 45800, "total_steps": 80000, "eval_loss": 5.108764171600342, "epoch": 0.3664938224185392, "percentage": 57.25, "elapsed_time": "1 day, 23:47:15", "remaining_time": "1 day, 11:41:03", "throughput": 84.29, "total_tokens": 14501416}
|
| 9390 |
+
{"current_steps": 45805, "total_steps": 80000, "loss": 4.4077, "lr": 0.11610685696791685, "epoch": 0.36653383266116124, "percentage": 57.26, "elapsed_time": "1 day, 23:47:17", "remaining_time": "1 day, 11:40:31", "throughput": 84.3, "total_tokens": 14503056}
|
| 9391 |
+
{"current_steps": 45810, "total_steps": 80000, "loss": 4.0565, "lr": 0.11607816689185468, "epoch": 0.36657384290378336, "percentage": 57.26, "elapsed_time": "1 day, 23:47:18", "remaining_time": "1 day, 11:39:59", "throughput": 84.31, "total_tokens": 14504704}
|