Training in progress, step 800
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 460928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5c3398046c8780dbc3bf7ef3e77c507c682a4e4102f4cd20f94e7c0d00fb67f
|
| 3 |
size 460928
|
trainer_log.jsonl
CHANGED
|
@@ -121,3 +121,44 @@
|
|
| 121 |
{"current_steps": 595, "total_steps": 40000, "loss": 1.0442, "lr": 0.29983679436649263, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:12:42", "remaining_time": "14:01:45", "throughput": 4565.78, "total_tokens": 3481952}
|
| 122 |
{"current_steps": 600, "total_steps": 40000, "loss": 1.1743, "lr": 0.2998340357419689, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:12:45", "remaining_time": "13:57:57", "throughput": 4586.59, "total_tokens": 3511712}
|
| 123 |
{"current_steps": 600, "total_steps": 40000, "eval_loss": 1.051778793334961, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:06", "remaining_time": "17:37:35", "throughput": 3634.09, "total_tokens": 3511712}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
{"current_steps": 595, "total_steps": 40000, "loss": 1.0442, "lr": 0.29983679436649263, "epoch": 0.07617950195249984, "percentage": 1.49, "elapsed_time": "0:12:42", "remaining_time": "14:01:45", "throughput": 4565.78, "total_tokens": 3481952}
|
| 122 |
{"current_steps": 600, "total_steps": 40000, "loss": 1.1743, "lr": 0.2998340357419689, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:12:45", "remaining_time": "13:57:57", "throughput": 4586.59, "total_tokens": 3511712}
|
| 123 |
{"current_steps": 600, "total_steps": 40000, "eval_loss": 1.051778793334961, "epoch": 0.07681966583445363, "percentage": 1.5, "elapsed_time": "0:16:06", "remaining_time": "17:37:35", "throughput": 3634.09, "total_tokens": 3511712}
|
| 124 |
+
{"current_steps": 605, "total_steps": 40000, "loss": 1.0642, "lr": 0.29983125401115385, "epoch": 0.0774598297164074, "percentage": 1.51, "elapsed_time": "0:16:10", "remaining_time": "17:33:30", "throughput": 3647.02, "total_tokens": 3540288}
|
| 125 |
+
{"current_steps": 610, "total_steps": 40000, "loss": 0.933, "lr": 0.29982844917447654, "epoch": 0.07809999359836119, "percentage": 1.52, "elapsed_time": "0:16:13", "remaining_time": "17:27:57", "throughput": 3666.17, "total_tokens": 3569856}
|
| 126 |
+
{"current_steps": 615, "total_steps": 40000, "loss": 0.991, "lr": 0.2998256212323695, "epoch": 0.07874015748031496, "percentage": 1.54, "elapsed_time": "0:16:16", "remaining_time": "17:22:27", "throughput": 3684.64, "total_tokens": 3598720}
|
| 127 |
+
{"current_steps": 620, "total_steps": 40000, "loss": 1.0314, "lr": 0.29982277018526887, "epoch": 0.07938032136226875, "percentage": 1.55, "elapsed_time": "0:16:19", "remaining_time": "17:17:02", "throughput": 3703.33, "total_tokens": 3627936}
|
| 128 |
+
{"current_steps": 625, "total_steps": 40000, "loss": 1.0803, "lr": 0.2998198960336143, "epoch": 0.08002048524422252, "percentage": 1.56, "elapsed_time": "0:16:22", "remaining_time": "17:11:45", "throughput": 3722.2, "total_tokens": 3657536}
|
| 129 |
+
{"current_steps": 630, "total_steps": 40000, "loss": 0.9903, "lr": 0.299816998777849, "epoch": 0.0806606491261763, "percentage": 1.57, "elapsed_time": "0:16:25", "remaining_time": "17:06:29", "throughput": 3740.22, "total_tokens": 3686176}
|
| 130 |
+
{"current_steps": 635, "total_steps": 40000, "loss": 0.9363, "lr": 0.2998140784184197, "epoch": 0.08130081300813008, "percentage": 1.59, "elapsed_time": "0:16:28", "remaining_time": "17:01:25", "throughput": 3759.08, "total_tokens": 3716224}
|
| 131 |
+
{"current_steps": 640, "total_steps": 40000, "loss": 1.1208, "lr": 0.2998111349557769, "epoch": 0.08194097689008387, "percentage": 1.6, "elapsed_time": "0:16:31", "remaining_time": "16:56:19", "throughput": 3776.63, "total_tokens": 3744672}
|
| 132 |
+
{"current_steps": 645, "total_steps": 40000, "loss": 0.978, "lr": 0.29980816839037444, "epoch": 0.08258114077203764, "percentage": 1.61, "elapsed_time": "0:16:34", "remaining_time": "16:51:26", "throughput": 3795.67, "total_tokens": 3775232}
|
| 133 |
+
{"current_steps": 650, "total_steps": 40000, "loss": 1.035, "lr": 0.2998051787226698, "epoch": 0.08322130465399143, "percentage": 1.62, "elapsed_time": "0:16:37", "remaining_time": "16:46:32", "throughput": 3813.82, "total_tokens": 3804608}
|
| 134 |
+
{"current_steps": 655, "total_steps": 40000, "loss": 0.9583, "lr": 0.29980216595312403, "epoch": 0.0838614685359452, "percentage": 1.64, "elapsed_time": "0:16:40", "remaining_time": "16:41:46", "throughput": 3832.89, "total_tokens": 3835296}
|
| 135 |
+
{"current_steps": 660, "total_steps": 40000, "loss": 0.9998, "lr": 0.29979913008220177, "epoch": 0.08450163241789899, "percentage": 1.65, "elapsed_time": "0:16:43", "remaining_time": "16:37:01", "throughput": 3851.27, "total_tokens": 3865184}
|
| 136 |
+
{"current_steps": 665, "total_steps": 40000, "loss": 0.9844, "lr": 0.2997960711103711, "epoch": 0.08514179629985276, "percentage": 1.66, "elapsed_time": "0:16:46", "remaining_time": "16:32:22", "throughput": 3869.27, "total_tokens": 3894912}
|
| 137 |
+
{"current_steps": 670, "total_steps": 40000, "loss": 0.9351, "lr": 0.29979298903810386, "epoch": 0.08578196018180655, "percentage": 1.68, "elapsed_time": "0:16:49", "remaining_time": "16:27:43", "throughput": 3886.63, "total_tokens": 3923840}
|
| 138 |
+
{"current_steps": 675, "total_steps": 40000, "loss": 1.1098, "lr": 0.29978988386587524, "epoch": 0.08642212406376032, "percentage": 1.69, "elapsed_time": "0:16:52", "remaining_time": "16:23:10", "throughput": 3904.02, "total_tokens": 3952992}
|
| 139 |
+
{"current_steps": 680, "total_steps": 40000, "loss": 0.8682, "lr": 0.2997867555941642, "epoch": 0.0870622879457141, "percentage": 1.7, "elapsed_time": "0:16:55", "remaining_time": "16:18:40", "throughput": 3921.49, "total_tokens": 3982336}
|
| 140 |
+
{"current_steps": 685, "total_steps": 40000, "loss": 0.944, "lr": 0.299783604223453, "epoch": 0.08770245182766788, "percentage": 1.71, "elapsed_time": "0:16:58", "remaining_time": "16:14:18", "throughput": 3939.37, "total_tokens": 4012416}
|
| 141 |
+
{"current_steps": 690, "total_steps": 40000, "loss": 0.8365, "lr": 0.29978042975422786, "epoch": 0.08834261570962167, "percentage": 1.73, "elapsed_time": "0:17:01", "remaining_time": "16:09:54", "throughput": 3955.9, "total_tokens": 4040864}
|
| 142 |
+
{"current_steps": 695, "total_steps": 40000, "loss": 0.8845, "lr": 0.29977723218697816, "epoch": 0.08898277959157544, "percentage": 1.74, "elapsed_time": "0:17:04", "remaining_time": "16:05:38", "throughput": 3972.96, "total_tokens": 4070208}
|
| 143 |
+
{"current_steps": 700, "total_steps": 40000, "loss": 1.1288, "lr": 0.299774011522197, "epoch": 0.08962294347352923, "percentage": 1.75, "elapsed_time": "0:17:07", "remaining_time": "16:01:26", "throughput": 3990.5, "total_tokens": 4100224}
|
| 144 |
+
{"current_steps": 705, "total_steps": 40000, "loss": 0.9551, "lr": 0.29977076776038114, "epoch": 0.090263107355483, "percentage": 1.76, "elapsed_time": "0:17:10", "remaining_time": "15:57:13", "throughput": 4006.74, "total_tokens": 4128640}
|
| 145 |
+
{"current_steps": 710, "total_steps": 40000, "loss": 1.0232, "lr": 0.2997675009020307, "epoch": 0.09090327123743679, "percentage": 1.77, "elapsed_time": "0:17:13", "remaining_time": "15:53:06", "throughput": 4023.48, "total_tokens": 4157888}
|
| 146 |
+
{"current_steps": 715, "total_steps": 40000, "loss": 0.883, "lr": 0.2997642109476496, "epoch": 0.09154343511939056, "percentage": 1.79, "elapsed_time": "0:17:16", "remaining_time": "15:49:02", "throughput": 4039.69, "total_tokens": 4186592}
|
| 147 |
+
{"current_steps": 720, "total_steps": 40000, "loss": 0.9275, "lr": 0.299760897897745, "epoch": 0.09218359900134435, "percentage": 1.8, "elapsed_time": "0:17:19", "remaining_time": "15:45:00", "throughput": 4056.21, "total_tokens": 4215712}
|
| 148 |
+
{"current_steps": 725, "total_steps": 40000, "loss": 0.971, "lr": 0.29975756175282803, "epoch": 0.09282376288329812, "percentage": 1.81, "elapsed_time": "0:17:22", "remaining_time": "15:41:02", "throughput": 4072.57, "total_tokens": 4244736}
|
| 149 |
+
{"current_steps": 730, "total_steps": 40000, "loss": 0.8922, "lr": 0.29975420251341306, "epoch": 0.0934639267652519, "percentage": 1.82, "elapsed_time": "0:17:25", "remaining_time": "15:37:09", "throughput": 4088.96, "total_tokens": 4274048}
|
| 150 |
+
{"current_steps": 735, "total_steps": 40000, "loss": 0.9909, "lr": 0.29975082018001814, "epoch": 0.09410409064720568, "percentage": 1.84, "elapsed_time": "0:17:28", "remaining_time": "15:33:15", "throughput": 4104.51, "total_tokens": 4302272}
|
| 151 |
+
{"current_steps": 740, "total_steps": 40000, "loss": 0.8478, "lr": 0.2997474147531648, "epoch": 0.09474425452915947, "percentage": 1.85, "elapsed_time": "0:17:31", "remaining_time": "15:29:27", "throughput": 4120.53, "total_tokens": 4331264}
|
| 152 |
+
{"current_steps": 745, "total_steps": 40000, "loss": 0.8995, "lr": 0.29974398623337833, "epoch": 0.09538441841111324, "percentage": 1.86, "elapsed_time": "0:17:34", "remaining_time": "15:25:45", "throughput": 4137.06, "total_tokens": 4361120}
|
| 153 |
+
{"current_steps": 750, "total_steps": 40000, "loss": 1.0509, "lr": 0.2997405346211873, "epoch": 0.09602458229306703, "percentage": 1.88, "elapsed_time": "0:17:37", "remaining_time": "15:22:05", "throughput": 4153.8, "total_tokens": 4391264}
|
| 154 |
+
{"current_steps": 755, "total_steps": 40000, "loss": 0.8757, "lr": 0.2997370599171241, "epoch": 0.0966647461750208, "percentage": 1.89, "elapsed_time": "0:17:40", "remaining_time": "15:18:27", "throughput": 4169.86, "total_tokens": 4420704}
|
| 155 |
+
{"current_steps": 760, "total_steps": 40000, "loss": 0.9997, "lr": 0.2997335621217246, "epoch": 0.09730491005697459, "percentage": 1.9, "elapsed_time": "0:17:43", "remaining_time": "15:14:50", "throughput": 4185.52, "total_tokens": 4449696}
|
| 156 |
+
{"current_steps": 765, "total_steps": 40000, "loss": 0.9914, "lr": 0.29973004123552816, "epoch": 0.09794507393892836, "percentage": 1.91, "elapsed_time": "0:17:46", "remaining_time": "15:11:16", "throughput": 4201.1, "total_tokens": 4478688}
|
| 157 |
+
{"current_steps": 770, "total_steps": 40000, "loss": 0.9542, "lr": 0.2997264972590777, "epoch": 0.09858523782088215, "percentage": 1.93, "elapsed_time": "0:17:49", "remaining_time": "15:07:44", "throughput": 4216.26, "total_tokens": 4507264}
|
| 158 |
+
{"current_steps": 775, "total_steps": 40000, "loss": 0.8846, "lr": 0.29972293019291973, "epoch": 0.09922540170283592, "percentage": 1.94, "elapsed_time": "0:17:51", "remaining_time": "15:04:14", "throughput": 4231.51, "total_tokens": 4536000}
|
| 159 |
+
{"current_steps": 780, "total_steps": 40000, "loss": 0.9268, "lr": 0.2997193400376045, "epoch": 0.0998655655847897, "percentage": 1.95, "elapsed_time": "0:17:54", "remaining_time": "15:00:51", "throughput": 4247.62, "total_tokens": 4566016}
|
| 160 |
+
{"current_steps": 785, "total_steps": 40000, "loss": 1.0586, "lr": 0.2997157267936854, "epoch": 0.1005057294667435, "percentage": 1.96, "elapsed_time": "0:17:57", "remaining_time": "14:57:29", "throughput": 4263.28, "total_tokens": 4595584}
|
| 161 |
+
{"current_steps": 790, "total_steps": 40000, "loss": 0.9325, "lr": 0.2997120904617199, "epoch": 0.10114589334869727, "percentage": 1.98, "elapsed_time": "0:18:01", "remaining_time": "14:54:14", "throughput": 4279.87, "total_tokens": 4626624}
|
| 162 |
+
{"current_steps": 795, "total_steps": 40000, "loss": 0.8653, "lr": 0.29970843104226863, "epoch": 0.10178605723065105, "percentage": 1.99, "elapsed_time": "0:18:04", "remaining_time": "14:50:59", "throughput": 4295.65, "total_tokens": 4656704}
|
| 163 |
+
{"current_steps": 800, "total_steps": 40000, "loss": 0.9377, "lr": 0.2997047485358959, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:18:07", "remaining_time": "14:47:45", "throughput": 4311.27, "total_tokens": 4686560}
|
| 164 |
+
{"current_steps": 800, "total_steps": 40000, "eval_loss": 0.9072315096855164, "epoch": 0.10242622111260483, "percentage": 2.0, "elapsed_time": "0:21:27", "remaining_time": "17:31:46", "throughput": 3638.95, "total_tokens": 4686560}
|