Training in progress, step 800
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18124968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2dc79f36afe51a0672688935070bbcd3c4028604b0d6b6c367642271f00ecd2
|
| 3 |
size 18124968
|
trainer_log.jsonl
CHANGED
|
@@ -121,3 +121,44 @@
|
|
| 121 |
{"current_steps": 595, "total_steps": 40000, "loss": 0.6038, "lr": 4.997279906108211e-05, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:15:59", "remaining_time": "17:39:18", "throughput": 3628.15, "total_tokens": 3481952}
|
| 122 |
{"current_steps": 600, "total_steps": 40000, "loss": 0.7324, "lr": 4.9972339290328155e-05, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:03", "remaining_time": "17:35:00", "throughput": 3642.95, "total_tokens": 3511712}
|
| 123 |
{"current_steps": 600, "total_steps": 40000, "eval_loss": 0.6449815034866333, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:19:48", "remaining_time": "21:40:53", "throughput": 2954.4, "total_tokens": 3511712}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
{"current_steps": 595, "total_steps": 40000, "loss": 0.6038, "lr": 4.997279906108211e-05, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:15:59", "remaining_time": "17:39:18", "throughput": 3628.15, "total_tokens": 3481952}
|
| 122 |
{"current_steps": 600, "total_steps": 40000, "loss": 0.7324, "lr": 4.9972339290328155e-05, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:03", "remaining_time": "17:35:00", "throughput": 3642.95, "total_tokens": 3511712}
|
| 123 |
{"current_steps": 600, "total_steps": 40000, "eval_loss": 0.6449815034866333, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:19:48", "remaining_time": "21:40:53", "throughput": 2954.4, "total_tokens": 3511712}
|
| 124 |
+
{"current_steps": 605, "total_steps": 40000, "loss": 0.6476, "lr": 4.9971875668525646e-05, "epoch": 0.0774598297164074, "percentage": 1.51, "elapsed_time": "0:19:54", "remaining_time": "21:36:40", "throughput": 2963.09, "total_tokens": 3540288}
|
| 125 |
+
{"current_steps": 610, "total_steps": 40000, "loss": 0.7037, "lr": 4.997140819574609e-05, "epoch": 0.07809999359836119, "percentage": 1.52, "elapsed_time": "0:19:59", "remaining_time": "21:30:25", "throughput": 2977.31, "total_tokens": 3569856}
|
| 126 |
+
{"current_steps": 615, "total_steps": 40000, "loss": 0.6651, "lr": 4.997093687206159e-05, "epoch": 0.07874015748031496, "percentage": 1.54, "elapsed_time": "0:20:03", "remaining_time": "21:24:16", "throughput": 2990.84, "total_tokens": 3598720}
|
| 127 |
+
{"current_steps": 620, "total_steps": 40000, "loss": 0.6294, "lr": 4.997046169754482e-05, "epoch": 0.07938032136226875, "percentage": 1.55, "elapsed_time": "0:20:07", "remaining_time": "21:18:13", "throughput": 3004.58, "total_tokens": 3627936}
|
| 128 |
+
{"current_steps": 625, "total_steps": 40000, "loss": 0.6613, "lr": 4.996998267226905e-05, "epoch": 0.08002048524422252, "percentage": 1.56, "elapsed_time": "0:20:11", "remaining_time": "21:12:16", "throughput": 3018.54, "total_tokens": 3657536}
|
| 129 |
+
{"current_steps": 630, "total_steps": 40000, "loss": 0.6997, "lr": 4.996949979630817e-05, "epoch": 0.0806606491261763, "percentage": 1.57, "elapsed_time": "0:20:15", "remaining_time": "21:06:24", "throughput": 3031.63, "total_tokens": 3686176}
|
| 130 |
+
{"current_steps": 635, "total_steps": 40000, "loss": 0.5562, "lr": 4.996901306973663e-05, "epoch": 0.08130081300813008, "percentage": 1.59, "elapsed_time": "0:20:20", "remaining_time": "21:00:45", "throughput": 3045.48, "total_tokens": 3716224}
|
| 131 |
+
{"current_steps": 640, "total_steps": 40000, "loss": 0.6569, "lr": 4.996852249262949e-05, "epoch": 0.08194097689008387, "percentage": 1.6, "elapsed_time": "0:20:24", "remaining_time": "20:55:04", "throughput": 3058.23, "total_tokens": 3744672}
|
| 132 |
+
{"current_steps": 645, "total_steps": 40000, "loss": 0.6224, "lr": 4.996802806506241e-05, "epoch": 0.08258114077203764, "percentage": 1.61, "elapsed_time": "0:20:28", "remaining_time": "20:49:33", "throughput": 3072.41, "total_tokens": 3775232}
|
| 133 |
+
{"current_steps": 650, "total_steps": 40000, "loss": 0.5666, "lr": 4.996752978711164e-05, "epoch": 0.08322130465399143, "percentage": 1.62, "elapsed_time": "0:20:32", "remaining_time": "20:44:03", "throughput": 3085.67, "total_tokens": 3804608}
|
| 134 |
+
{"current_steps": 655, "total_steps": 40000, "loss": 0.5693, "lr": 4.996702765885401e-05, "epoch": 0.0838614685359452, "percentage": 1.64, "elapsed_time": "0:20:37", "remaining_time": "20:38:40", "throughput": 3099.82, "total_tokens": 3835296}
|
| 135 |
+
{"current_steps": 660, "total_steps": 40000, "loss": 0.6508, "lr": 4.9966521680366964e-05, "epoch": 0.08450163241789899, "percentage": 1.65, "elapsed_time": "0:20:41", "remaining_time": "20:33:21", "throughput": 3113.29, "total_tokens": 3865184}
|
| 136 |
+
{"current_steps": 665, "total_steps": 40000, "loss": 0.5583, "lr": 4.9966011851728524e-05, "epoch": 0.08514179629985276, "percentage": 1.66, "elapsed_time": "0:20:45", "remaining_time": "20:28:07", "throughput": 3126.54, "total_tokens": 3894912}
|
| 137 |
+
{"current_steps": 670, "total_steps": 40000, "loss": 0.6213, "lr": 4.996549817301731e-05, "epoch": 0.08578196018180655, "percentage": 1.68, "elapsed_time": "0:20:49", "remaining_time": "20:22:55", "throughput": 3139.13, "total_tokens": 3923840}
|
| 138 |
+
{"current_steps": 675, "total_steps": 40000, "loss": 0.6578, "lr": 4.9964980644312544e-05, "epoch": 0.08642212406376032, "percentage": 1.69, "elapsed_time": "0:20:54", "remaining_time": "20:17:48", "throughput": 3151.82, "total_tokens": 3952992}
|
| 139 |
+
{"current_steps": 680, "total_steps": 40000, "loss": 0.5537, "lr": 4.996445926569403e-05, "epoch": 0.0870622879457141, "percentage": 1.7, "elapsed_time": "0:20:58", "remaining_time": "20:12:47", "throughput": 3164.52, "total_tokens": 3982336}
|
| 140 |
+
{"current_steps": 685, "total_steps": 40000, "loss": 0.6498, "lr": 4.996393403724218e-05, "epoch": 0.08770245182766788, "percentage": 1.71, "elapsed_time": "0:21:02", "remaining_time": "20:07:51", "throughput": 3177.67, "total_tokens": 4012416}
|
| 141 |
+
{"current_steps": 690, "total_steps": 40000, "loss": 0.549, "lr": 4.9963404959037985e-05, "epoch": 0.08834261570962167, "percentage": 1.73, "elapsed_time": "0:21:06", "remaining_time": "20:02:56", "throughput": 3189.59, "total_tokens": 4040864}
|
| 142 |
+
{"current_steps": 695, "total_steps": 40000, "loss": 0.6214, "lr": 4.996287203116303e-05, "epoch": 0.08898277959157544, "percentage": 1.74, "elapsed_time": "0:21:11", "remaining_time": "19:58:06", "throughput": 3202.06, "total_tokens": 4070208}
|
| 143 |
+
{"current_steps": 700, "total_steps": 40000, "loss": 0.6793, "lr": 4.996233525369951e-05, "epoch": 0.08962294347352923, "percentage": 1.75, "elapsed_time": "0:21:15", "remaining_time": "19:53:25", "throughput": 3214.83, "total_tokens": 4100224}
|
| 144 |
+
{"current_steps": 705, "total_steps": 40000, "loss": 0.6036, "lr": 4.99617946267302e-05, "epoch": 0.090263107355483, "percentage": 1.76, "elapsed_time": "0:21:19", "remaining_time": "19:48:42", "throughput": 3226.5, "total_tokens": 4128640}
|
| 145 |
+
{"current_steps": 710, "total_steps": 40000, "loss": 0.658, "lr": 4.996125015033846e-05, "epoch": 0.09090327123743679, "percentage": 1.77, "elapsed_time": "0:21:23", "remaining_time": "19:44:03", "throughput": 3238.7, "total_tokens": 4157888}
|
| 146 |
+
{"current_steps": 715, "total_steps": 40000, "loss": 0.5575, "lr": 4.996070182460827e-05, "epoch": 0.09154343511939056, "percentage": 1.79, "elapsed_time": "0:21:28", "remaining_time": "19:39:29", "throughput": 3250.41, "total_tokens": 4186592}
|
| 147 |
+
{"current_steps": 720, "total_steps": 40000, "loss": 0.5941, "lr": 4.996014964962418e-05, "epoch": 0.09218359900134435, "percentage": 1.8, "elapsed_time": "0:21:32", "remaining_time": "19:35:00", "throughput": 3262.28, "total_tokens": 4215712}
|
| 148 |
+
{"current_steps": 725, "total_steps": 40000, "loss": 0.6708, "lr": 4.9959593625471344e-05, "epoch": 0.09282376288329812, "percentage": 1.81, "elapsed_time": "0:21:36", "remaining_time": "19:30:33", "throughput": 3274.04, "total_tokens": 4244736}
|
| 149 |
+
{"current_steps": 730, "total_steps": 40000, "loss": 0.6292, "lr": 4.995903375223552e-05, "epoch": 0.0934639267652519, "percentage": 1.82, "elapsed_time": "0:21:40", "remaining_time": "19:26:11", "throughput": 3285.9, "total_tokens": 4274048}
|
| 150 |
+
{"current_steps": 735, "total_steps": 40000, "loss": 0.6339, "lr": 4.995847003000302e-05, "epoch": 0.09410409064720568, "percentage": 1.84, "elapsed_time": "0:21:44", "remaining_time": "19:21:51", "throughput": 3296.95, "total_tokens": 4302272}
|
| 151 |
+
{"current_steps": 740, "total_steps": 40000, "loss": 0.5888, "lr": 4.9957902458860804e-05, "epoch": 0.09474425452915947, "percentage": 1.85, "elapsed_time": "0:21:49", "remaining_time": "19:17:35", "throughput": 3308.46, "total_tokens": 4331264}
|
| 152 |
+
{"current_steps": 745, "total_steps": 40000, "loss": 0.5633, "lr": 4.995733103889639e-05, "epoch": 0.09538441841111324, "percentage": 1.86, "elapsed_time": "0:21:53", "remaining_time": "19:13:24", "throughput": 3320.52, "total_tokens": 4361120}
|
| 153 |
+
{"current_steps": 750, "total_steps": 40000, "loss": 0.6356, "lr": 4.99567557701979e-05, "epoch": 0.09602458229306703, "percentage": 1.88, "elapsed_time": "0:21:57", "remaining_time": "19:09:16", "throughput": 3332.69, "total_tokens": 4391264}
|
| 154 |
+
{"current_steps": 755, "total_steps": 40000, "loss": 0.6589, "lr": 4.995617665285403e-05, "epoch": 0.0966647461750208, "percentage": 1.89, "elapsed_time": "0:22:01", "remaining_time": "19:05:11", "throughput": 3344.25, "total_tokens": 4420704}
|
| 155 |
+
{"current_steps": 760, "total_steps": 40000, "loss": 0.6018, "lr": 4.99555936869541e-05, "epoch": 0.09730491005697459, "percentage": 1.9, "elapsed_time": "0:22:06", "remaining_time": "19:01:08", "throughput": 3355.48, "total_tokens": 4449696}
|
| 156 |
+
{"current_steps": 765, "total_steps": 40000, "loss": 0.654, "lr": 4.995500687258803e-05, "epoch": 0.09794507393892836, "percentage": 1.91, "elapsed_time": "0:22:10", "remaining_time": "18:57:08", "throughput": 3366.65, "total_tokens": 4478688}
|
| 157 |
+
{"current_steps": 770, "total_steps": 40000, "loss": 0.6595, "lr": 4.995441620984628e-05, "epoch": 0.09858523782088215, "percentage": 1.93, "elapsed_time": "0:22:14", "remaining_time": "18:53:10", "throughput": 3377.47, "total_tokens": 4507264}
|
| 158 |
+
{"current_steps": 775, "total_steps": 40000, "loss": 0.5934, "lr": 4.995382169881996e-05, "epoch": 0.09922540170283592, "percentage": 1.94, "elapsed_time": "0:22:18", "remaining_time": "18:49:17", "throughput": 3388.28, "total_tokens": 4536000}
|
| 159 |
+
{"current_steps": 780, "total_steps": 40000, "loss": 0.6718, "lr": 4.9953223339600755e-05, "epoch": 0.0998655655847897, "percentage": 1.95, "elapsed_time": "0:22:22", "remaining_time": "18:45:27", "throughput": 3399.91, "total_tokens": 4566016}
|
| 160 |
+
{"current_steps": 785, "total_steps": 40000, "loss": 0.6596, "lr": 4.995262113228091e-05, "epoch": 0.1005057294667435, "percentage": 1.96, "elapsed_time": "0:22:27", "remaining_time": "18:41:42", "throughput": 3411.07, "total_tokens": 4595584}
|
| 161 |
+
{"current_steps": 790, "total_steps": 40000, "loss": 0.5254, "lr": 4.995201507695332e-05, "epoch": 0.10114589334869727, "percentage": 1.98, "elapsed_time": "0:22:31", "remaining_time": "18:38:02", "throughput": 3423.14, "total_tokens": 4626624}
|
| 162 |
+
{"current_steps": 795, "total_steps": 40000, "loss": 0.6161, "lr": 4.995140517371144e-05, "epoch": 0.10178605723065105, "percentage": 1.99, "elapsed_time": "0:22:35", "remaining_time": "18:34:23", "throughput": 3434.53, "total_tokens": 4656704}
|
| 163 |
+
{"current_steps": 800, "total_steps": 40000, "loss": 0.6898, "lr": 4.995079142264932e-05, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:22:40", "remaining_time": "18:30:44", "throughput": 3445.76, "total_tokens": 4686560}
|
| 164 |
+
{"current_steps": 800, "total_steps": 40000, "eval_loss": 0.6095894575119019, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:26:28", "remaining_time": "21:37:03", "throughput": 2950.8, "total_tokens": 4686560}
|