Training in progress, epoch 3
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7595ed142582256570235e38def78c1297695e381a6dd1914f8c3fc8b7c32f2e
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bf59cb0181dd5eeadb6bb6956b44a0453ef5435a50d9956ab04a17adf07969c
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d224a95259f724b593ca15484174f79266a34d21b7b364016a0a648aff3f89a
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f19d20136f05fe35a0b594e25e5e7e2f5a3301a9230aa9c01d455b5add5aaeca
|
| 3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
|
@@ -175,3 +175,76 @@
|
|
| 175 |
{"current_steps": 175, "total_steps": 249, "loss": 0.5554, "lr": 2.4596232737673544e-06, "epoch": 2.108433734939759, "percentage": 70.28, "elapsed_time": "0:43:34", "remaining_time": "0:18:25"}
|
| 176 |
{"current_steps": 176, "total_steps": 249, "loss": 0.5792, "lr": 2.3994756886051267e-06, "epoch": 2.1204819277108435, "percentage": 70.68, "elapsed_time": "0:43:48", "remaining_time": "0:18:10"}
|
| 177 |
{"current_steps": 177, "total_steps": 249, "loss": 0.5718, "lr": 2.339839617423318e-06, "epoch": 2.1325301204819276, "percentage": 71.08, "elapsed_time": "0:44:03", "remaining_time": "0:17:55"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
{"current_steps": 175, "total_steps": 249, "loss": 0.5554, "lr": 2.4596232737673544e-06, "epoch": 2.108433734939759, "percentage": 70.28, "elapsed_time": "0:43:34", "remaining_time": "0:18:25"}
|
| 176 |
{"current_steps": 176, "total_steps": 249, "loss": 0.5792, "lr": 2.3994756886051267e-06, "epoch": 2.1204819277108435, "percentage": 70.68, "elapsed_time": "0:43:48", "remaining_time": "0:18:10"}
|
| 177 |
{"current_steps": 177, "total_steps": 249, "loss": 0.5718, "lr": 2.339839617423318e-06, "epoch": 2.1325301204819276, "percentage": 71.08, "elapsed_time": "0:44:03", "remaining_time": "0:17:55"}
|
| 178 |
+
{"current_steps": 178, "total_steps": 249, "loss": 0.5497, "lr": 2.280726790427258e-06, "epoch": 2.144578313253012, "percentage": 71.49, "elapsed_time": "0:44:16", "remaining_time": "0:17:39"}
|
| 179 |
+
{"current_steps": 179, "total_steps": 249, "loss": 0.5524, "lr": 2.2221488349019903e-06, "epoch": 2.1566265060240966, "percentage": 71.89, "elapsed_time": "0:44:31", "remaining_time": "0:17:24"}
|
| 180 |
+
{"current_steps": 180, "total_steps": 249, "loss": 0.5682, "lr": 2.1641172729252206e-06, "epoch": 2.1686746987951806, "percentage": 72.29, "elapsed_time": "0:44:44", "remaining_time": "0:17:09"}
|
| 181 |
+
{"current_steps": 181, "total_steps": 249, "loss": 0.547, "lr": 2.1066435191009717e-06, "epoch": 2.180722891566265, "percentage": 72.69, "elapsed_time": "0:44:54", "remaining_time": "0:16:52"}
|
| 182 |
+
{"current_steps": 182, "total_steps": 249, "loss": 0.5709, "lr": 2.04973887831436e-06, "epoch": 2.1927710843373496, "percentage": 73.09, "elapsed_time": "0:45:09", "remaining_time": "0:16:37"}
|
| 183 |
+
{"current_steps": 183, "total_steps": 249, "loss": 0.5447, "lr": 1.9934145435079705e-06, "epoch": 2.2048192771084336, "percentage": 73.49, "elapsed_time": "0:45:18", "remaining_time": "0:16:20"}
|
| 184 |
+
{"current_steps": 184, "total_steps": 249, "loss": 0.5601, "lr": 1.9376815934802496e-06, "epoch": 2.216867469879518, "percentage": 73.9, "elapsed_time": "0:45:32", "remaining_time": "0:16:05"}
|
| 185 |
+
{"current_steps": 185, "total_steps": 249, "loss": 0.5551, "lr": 1.8825509907063328e-06, "epoch": 2.2289156626506026, "percentage": 74.3, "elapsed_time": "0:45:47", "remaining_time": "0:15:50"}
|
| 186 |
+
{"current_steps": 186, "total_steps": 249, "loss": 0.5688, "lr": 1.8280335791817733e-06, "epoch": 2.2409638554216866, "percentage": 74.7, "elapsed_time": "0:46:02", "remaining_time": "0:15:35"}
|
| 187 |
+
{"current_steps": 187, "total_steps": 249, "loss": 0.581, "lr": 1.7741400822895633e-06, "epoch": 2.253012048192771, "percentage": 75.1, "elapsed_time": "0:46:19", "remaining_time": "0:15:21"}
|
| 188 |
+
{"current_steps": 188, "total_steps": 249, "loss": 0.5641, "lr": 1.7208811006908798e-06, "epoch": 2.2650602409638556, "percentage": 75.5, "elapsed_time": "0:46:34", "remaining_time": "0:15:06"}
|
| 189 |
+
{"current_steps": 189, "total_steps": 249, "loss": 0.5769, "lr": 1.6682671102399806e-06, "epoch": 2.2771084337349397, "percentage": 75.9, "elapsed_time": "0:46:48", "remaining_time": "0:14:51"}
|
| 190 |
+
{"current_steps": 190, "total_steps": 249, "loss": 0.5682, "lr": 1.6163084599236278e-06, "epoch": 2.289156626506024, "percentage": 76.31, "elapsed_time": "0:47:03", "remaining_time": "0:14:36"}
|
| 191 |
+
{"current_steps": 191, "total_steps": 249, "loss": 0.5803, "lr": 1.5650153698254916e-06, "epoch": 2.3012048192771086, "percentage": 76.71, "elapsed_time": "0:47:18", "remaining_time": "0:14:22"}
|
| 192 |
+
{"current_steps": 192, "total_steps": 249, "loss": 0.5466, "lr": 1.514397929115884e-06, "epoch": 2.3132530120481927, "percentage": 77.11, "elapsed_time": "0:47:31", "remaining_time": "0:14:06"}
|
| 193 |
+
{"current_steps": 193, "total_steps": 249, "loss": 0.541, "lr": 1.4644660940672628e-06, "epoch": 2.325301204819277, "percentage": 77.51, "elapsed_time": "0:47:46", "remaining_time": "0:13:51"}
|
| 194 |
+
{"current_steps": 194, "total_steps": 249, "loss": 0.5534, "lr": 1.4152296860958641e-06, "epoch": 2.337349397590361, "percentage": 77.91, "elapsed_time": "0:48:00", "remaining_time": "0:13:36"}
|
| 195 |
+
{"current_steps": 195, "total_steps": 249, "loss": 0.5785, "lr": 1.3666983898298659e-06, "epoch": 2.3493975903614457, "percentage": 78.31, "elapsed_time": "0:48:16", "remaining_time": "0:13:22"}
|
| 196 |
+
{"current_steps": 196, "total_steps": 249, "loss": 0.5618, "lr": 1.3188817512044544e-06, "epoch": 2.36144578313253, "percentage": 78.71, "elapsed_time": "0:48:29", "remaining_time": "0:13:06"}
|
| 197 |
+
{"current_steps": 197, "total_steps": 249, "loss": 0.5613, "lr": 1.2717891755841722e-06, "epoch": 2.3734939759036147, "percentage": 79.12, "elapsed_time": "0:48:45", "remaining_time": "0:12:52"}
|
| 198 |
+
{"current_steps": 198, "total_steps": 249, "loss": 0.5456, "lr": 1.225429925912921e-06, "epoch": 2.3855421686746987, "percentage": 79.52, "elapsed_time": "0:49:00", "remaining_time": "0:12:37"}
|
| 199 |
+
{"current_steps": 199, "total_steps": 249, "loss": 0.5608, "lr": 1.1798131208919628e-06, "epoch": 2.397590361445783, "percentage": 79.92, "elapsed_time": "0:49:15", "remaining_time": "0:12:22"}
|
| 200 |
+
{"current_steps": 200, "total_steps": 249, "loss": 0.5671, "lr": 1.134947733186315e-06, "epoch": 2.4096385542168672, "percentage": 80.32, "elapsed_time": "0:49:32", "remaining_time": "0:12:08"}
|
| 201 |
+
{"current_steps": 201, "total_steps": 249, "loss": 0.5562, "lr": 1.0908425876598512e-06, "epoch": 2.4216867469879517, "percentage": 80.72, "elapsed_time": "0:49:44", "remaining_time": "0:11:52"}
|
| 202 |
+
{"current_steps": 202, "total_steps": 249, "loss": 0.5622, "lr": 1.047506359639483e-06, "epoch": 2.433734939759036, "percentage": 81.12, "elapsed_time": "0:49:58", "remaining_time": "0:11:37"}
|
| 203 |
+
{"current_steps": 203, "total_steps": 249, "loss": 0.5687, "lr": 1.004947573208756e-06, "epoch": 2.4457831325301207, "percentage": 81.53, "elapsed_time": "0:50:13", "remaining_time": "0:11:22"}
|
| 204 |
+
{"current_steps": 204, "total_steps": 249, "loss": 0.57, "lr": 9.631745995311881e-07, "epoch": 2.4578313253012047, "percentage": 81.93, "elapsed_time": "0:50:29", "remaining_time": "0:11:08"}
|
| 205 |
+
{"current_steps": 205, "total_steps": 249, "loss": 0.5688, "lr": 9.221956552036992e-07, "epoch": 2.4698795180722892, "percentage": 82.33, "elapsed_time": "0:50:44", "remaining_time": "0:10:53"}
|
| 206 |
+
{"current_steps": 206, "total_steps": 249, "loss": 0.5438, "lr": 8.820188006404268e-07, "epoch": 2.4819277108433733, "percentage": 82.73, "elapsed_time": "0:50:58", "remaining_time": "0:10:38"}
|
| 207 |
+
{"current_steps": 207, "total_steps": 249, "loss": 0.5878, "lr": 8.426519384872733e-07, "epoch": 2.4939759036144578, "percentage": 83.13, "elapsed_time": "0:51:12", "remaining_time": "0:10:23"}
|
| 208 |
+
{"current_steps": 208, "total_steps": 249, "loss": 0.5353, "lr": 8.041028120674894e-07, "epoch": 2.5060240963855422, "percentage": 83.53, "elapsed_time": "0:51:27", "remaining_time": "0:10:08"}
|
| 209 |
+
{"current_steps": 209, "total_steps": 249, "loss": 0.5614, "lr": 7.663790038585794e-07, "epoch": 2.5180722891566267, "percentage": 83.94, "elapsed_time": "0:51:41", "remaining_time": "0:09:53"}
|
| 210 |
+
{"current_steps": 210, "total_steps": 249, "loss": 0.567, "lr": 7.294879340008632e-07, "epoch": 2.5301204819277108, "percentage": 84.34, "elapsed_time": "0:51:54", "remaining_time": "0:09:38"}
|
| 211 |
+
{"current_steps": 211, "total_steps": 249, "loss": 0.576, "lr": 6.934368588379553e-07, "epoch": 2.5421686746987953, "percentage": 84.74, "elapsed_time": "0:52:10", "remaining_time": "0:09:23"}
|
| 212 |
+
{"current_steps": 212, "total_steps": 249, "loss": 0.5703, "lr": 6.582328694894729e-07, "epoch": 2.5542168674698793, "percentage": 85.14, "elapsed_time": "0:52:24", "remaining_time": "0:09:08"}
|
| 213 |
+
{"current_steps": 213, "total_steps": 249, "loss": 0.5631, "lr": 6.238828904562316e-07, "epoch": 2.566265060240964, "percentage": 85.54, "elapsed_time": "0:52:39", "remaining_time": "0:08:53"}
|
| 214 |
+
{"current_steps": 214, "total_steps": 249, "loss": 0.5625, "lr": 5.903936782582253e-07, "epoch": 2.5783132530120483, "percentage": 85.94, "elapsed_time": "0:52:51", "remaining_time": "0:08:38"}
|
| 215 |
+
{"current_steps": 215, "total_steps": 249, "loss": 0.5504, "lr": 5.577718201056392e-07, "epoch": 2.5903614457831328, "percentage": 86.35, "elapsed_time": "0:53:05", "remaining_time": "0:08:23"}
|
| 216 |
+
{"current_steps": 216, "total_steps": 249, "loss": 0.5566, "lr": 5.260237326031698e-07, "epoch": 2.602409638554217, "percentage": 86.75, "elapsed_time": "0:53:18", "remaining_time": "0:08:08"}
|
| 217 |
+
{"current_steps": 217, "total_steps": 249, "loss": 0.5706, "lr": 4.951556604879049e-07, "epoch": 2.6144578313253013, "percentage": 87.15, "elapsed_time": "0:53:32", "remaining_time": "0:07:53"}
|
| 218 |
+
{"current_steps": 218, "total_steps": 249, "loss": 0.5484, "lr": 4.651736754009972e-07, "epoch": 2.6265060240963853, "percentage": 87.55, "elapsed_time": "0:53:47", "remaining_time": "0:07:38"}
|
| 219 |
+
{"current_steps": 219, "total_steps": 249, "loss": 0.5607, "lr": 4.3608367469340553e-07, "epoch": 2.63855421686747, "percentage": 87.95, "elapsed_time": "0:54:03", "remaining_time": "0:07:24"}
|
| 220 |
+
{"current_steps": 220, "total_steps": 249, "loss": 0.5679, "lr": 4.078913802658946e-07, "epoch": 2.6506024096385543, "percentage": 88.35, "elapsed_time": "0:54:16", "remaining_time": "0:07:09"}
|
| 221 |
+
{"current_steps": 221, "total_steps": 249, "loss": 0.5632, "lr": 3.8060233744356634e-07, "epoch": 2.662650602409639, "percentage": 88.76, "elapsed_time": "0:54:29", "remaining_time": "0:06:54"}
|
| 222 |
+
{"current_steps": 222, "total_steps": 249, "loss": 0.5695, "lr": 3.542219138851094e-07, "epoch": 2.674698795180723, "percentage": 89.16, "elapsed_time": "0:54:43", "remaining_time": "0:06:39"}
|
| 223 |
+
{"current_steps": 223, "total_steps": 249, "loss": 0.5578, "lr": 3.287552985270015e-07, "epoch": 2.6867469879518073, "percentage": 89.56, "elapsed_time": "0:54:54", "remaining_time": "0:06:24"}
|
| 224 |
+
{"current_steps": 224, "total_steps": 249, "loss": 0.5525, "lr": 3.0420750056286195e-07, "epoch": 2.6987951807228914, "percentage": 89.96, "elapsed_time": "0:55:10", "remaining_time": "0:06:09"}
|
| 225 |
+
{"current_steps": 225, "total_steps": 249, "loss": 0.5582, "lr": 2.8058334845816214e-07, "epoch": 2.710843373493976, "percentage": 90.36, "elapsed_time": "0:55:24", "remaining_time": "0:05:54"}
|
| 226 |
+
{"current_steps": 226, "total_steps": 249, "loss": 0.563, "lr": 2.5788748900048676e-07, "epoch": 2.7228915662650603, "percentage": 90.76, "elapsed_time": "0:55:39", "remaining_time": "0:05:39"}
|
| 227 |
+
{"current_steps": 227, "total_steps": 249, "loss": 0.5657, "lr": 2.3612438638551837e-07, "epoch": 2.734939759036145, "percentage": 91.16, "elapsed_time": "0:55:54", "remaining_time": "0:05:25"}
|
| 228 |
+
{"current_steps": 228, "total_steps": 249, "loss": 0.5768, "lr": 2.152983213389559e-07, "epoch": 2.746987951807229, "percentage": 91.57, "elapsed_time": "0:56:09", "remaining_time": "0:05:10"}
|
| 229 |
+
{"current_steps": 229, "total_steps": 249, "loss": 0.5765, "lr": 1.9541339027450256e-07, "epoch": 2.7590361445783134, "percentage": 91.97, "elapsed_time": "0:56:24", "remaining_time": "0:04:55"}
|
| 230 |
+
{"current_steps": 230, "total_steps": 249, "loss": 0.5434, "lr": 1.7647350448812105e-07, "epoch": 2.7710843373493974, "percentage": 92.37, "elapsed_time": "0:56:38", "remaining_time": "0:04:40"}
|
| 231 |
+
{"current_steps": 231, "total_steps": 249, "loss": 0.5644, "lr": 1.5848238938869332e-07, "epoch": 2.783132530120482, "percentage": 92.77, "elapsed_time": "0:56:52", "remaining_time": "0:04:25"}
|
| 232 |
+
{"current_steps": 232, "total_steps": 249, "loss": 0.5711, "lr": 1.4144358376524504e-07, "epoch": 2.7951807228915664, "percentage": 93.17, "elapsed_time": "0:57:06", "remaining_time": "0:04:11"}
|
| 233 |
+
{"current_steps": 233, "total_steps": 249, "loss": 0.5747, "lr": 1.253604390908819e-07, "epoch": 2.807228915662651, "percentage": 93.57, "elapsed_time": "0:57:21", "remaining_time": "0:03:56"}
|
| 234 |
+
{"current_steps": 234, "total_steps": 249, "loss": 0.5741, "lr": 1.10236118863562e-07, "epoch": 2.819277108433735, "percentage": 93.98, "elapsed_time": "0:57:33", "remaining_time": "0:03:41"}
|
| 235 |
+
{"current_steps": 235, "total_steps": 249, "loss": 0.5537, "lr": 9.607359798384785e-08, "epoch": 2.8313253012048194, "percentage": 94.38, "elapsed_time": "0:57:45", "remaining_time": "0:03:26"}
|
| 236 |
+
{"current_steps": 236, "total_steps": 249, "loss": 0.5436, "lr": 8.287566216975795e-08, "epoch": 2.8433734939759034, "percentage": 94.78, "elapsed_time": "0:57:57", "remaining_time": "0:03:11"}
|
| 237 |
+
{"current_steps": 237, "total_steps": 249, "loss": 0.5481, "lr": 7.064490740882057e-08, "epoch": 2.855421686746988, "percentage": 95.18, "elapsed_time": "0:58:11", "remaining_time": "0:02:56"}
|
| 238 |
+
{"current_steps": 238, "total_steps": 249, "loss": 0.5745, "lr": 5.938373944745612e-08, "epoch": 2.8674698795180724, "percentage": 95.58, "elapsed_time": "0:58:24", "remaining_time": "0:02:41"}
|
| 239 |
+
{"current_steps": 239, "total_steps": 249, "loss": 0.569, "lr": 4.909437331777178e-08, "epoch": 2.8795180722891565, "percentage": 95.98, "elapsed_time": "0:58:41", "remaining_time": "0:02:27"}
|
| 240 |
+
{"current_steps": 240, "total_steps": 249, "loss": 0.5433, "lr": 3.977883290187667e-08, "epoch": 2.891566265060241, "percentage": 96.39, "elapsed_time": "0:58:56", "remaining_time": "0:02:12"}
|
| 241 |
+
{"current_steps": 241, "total_steps": 249, "loss": 0.5637, "lr": 3.143895053378698e-08, "epoch": 2.9036144578313254, "percentage": 96.79, "elapsed_time": "0:59:11", "remaining_time": "0:01:57"}
|
| 242 |
+
{"current_steps": 242, "total_steps": 249, "loss": 0.5464, "lr": 2.4076366639015914e-08, "epoch": 2.9156626506024095, "percentage": 97.19, "elapsed_time": "0:59:25", "remaining_time": "0:01:43"}
|
| 243 |
+
{"current_steps": 243, "total_steps": 249, "loss": 0.5313, "lr": 1.769252941190458e-08, "epoch": 2.927710843373494, "percentage": 97.59, "elapsed_time": "0:59:39", "remaining_time": "0:01:28"}
|
| 244 |
+
{"current_steps": 244, "total_steps": 249, "loss": 0.5517, "lr": 1.2288694530769862e-08, "epoch": 2.9397590361445785, "percentage": 97.99, "elapsed_time": "0:59:53", "remaining_time": "0:01:13"}
|
| 245 |
+
{"current_steps": 245, "total_steps": 249, "loss": 0.5535, "lr": 7.865924910916977e-09, "epoch": 2.9518072289156625, "percentage": 98.39, "elapsed_time": "1:00:07", "remaining_time": "0:00:58"}
|
| 246 |
+
{"current_steps": 246, "total_steps": 249, "loss": 0.5445, "lr": 4.4250904955656095e-09, "epoch": 2.963855421686747, "percentage": 98.8, "elapsed_time": "1:00:22", "remaining_time": "0:00:44"}
|
| 247 |
+
{"current_steps": 247, "total_steps": 249, "loss": 0.5569, "lr": 1.9668680847356735e-09, "epoch": 2.9759036144578315, "percentage": 99.2, "elapsed_time": "1:00:37", "remaining_time": "0:00:29"}
|
| 248 |
+
{"current_steps": 248, "total_steps": 249, "loss": 0.5643, "lr": 4.91741202124918e-10, "epoch": 2.9879518072289155, "percentage": 99.6, "elapsed_time": "1:00:51", "remaining_time": "0:00:14"}
|
| 249 |
+
{"current_steps": 249, "total_steps": 249, "loss": 0.5477, "lr": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:01:00", "remaining_time": "0:00:00"}
|
| 250 |
+
{"current_steps": 249, "total_steps": 249, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:03:15", "remaining_time": "0:00:00"}
|