Training in progress, step 16400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1351,6 +1351,10 @@ You can finetune this model on your own dataset.
|
|
| 1351 |
| 0.2845 | 16100 | 0.3489 |
|
| 1352 |
| 0.2854 | 16150 | 0.4209 |
|
| 1353 |
| 0.2863 | 16200 | 0.2984 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
|
| 1355 |
</details>
|
| 1356 |
|
|
|
|
| 1351 |
| 0.2845 | 16100 | 0.3489 |
|
| 1352 |
| 0.2854 | 16150 | 0.4209 |
|
| 1353 |
| 0.2863 | 16200 | 0.2984 |
|
| 1354 |
+
| 0.2871 | 16250 | 0.3877 |
|
| 1355 |
+
| 0.2880 | 16300 | 0.3508 |
|
| 1356 |
+
| 0.2889 | 16350 | 0.3443 |
|
| 1357 |
+
| 0.2898 | 16400 | 0.4346 |
|
| 1358 |
|
| 1359 |
</details>
|
| 1360 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72204e50bc6a729a9530dbec2c2b7b58645a458ae9d80bb5e58884f5987fcb41
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deb1ec3cd79155004f27b637dbdf8248d31df7a7e6b9271ef520a45273a071c7
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0290ac5f04e55c88614a5fc8876370d93a593987208fcc581fc642918b1da8ba
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f9ee0ff1940dd91113eeb7613899018df3dbdd59112c1db2a4ee540aa7411d1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d25210b544040bb154d351b7b54b39ff9c98281c0d9fc772c1d1b0a64f1f30f6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2276,6 +2276,34 @@
|
|
| 2276 |
"learning_rate": 3.96565942157036e-05,
|
| 2277 |
"loss": 0.2984,
|
| 2278 |
"step": 16200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2279 |
}
|
| 2280 |
],
|
| 2281 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.2897987312470181,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2276 |
"learning_rate": 3.96565942157036e-05,
|
| 2277 |
"loss": 0.2984,
|
| 2278 |
"step": 16200
|
| 2279 |
+
},
|
| 2280 |
+
{
|
| 2281 |
+
"epoch": 0.2871481330953685,
|
| 2282 |
+
"grad_norm": 1.705592155456543,
|
| 2283 |
+
"learning_rate": 3.9607508197365065e-05,
|
| 2284 |
+
"loss": 0.3877,
|
| 2285 |
+
"step": 16250
|
| 2286 |
+
},
|
| 2287 |
+
{
|
| 2288 |
+
"epoch": 0.28803166581258505,
|
| 2289 |
+
"grad_norm": 1.5367944240570068,
|
| 2290 |
+
"learning_rate": 3.955842217902653e-05,
|
| 2291 |
+
"loss": 0.3508,
|
| 2292 |
+
"step": 16300
|
| 2293 |
+
},
|
| 2294 |
+
{
|
| 2295 |
+
"epoch": 0.2889151985298016,
|
| 2296 |
+
"grad_norm": 3.140960693359375,
|
| 2297 |
+
"learning_rate": 3.9509336160687994e-05,
|
| 2298 |
+
"loss": 0.3443,
|
| 2299 |
+
"step": 16350
|
| 2300 |
+
},
|
| 2301 |
+
{
|
| 2302 |
+
"epoch": 0.2897987312470181,
|
| 2303 |
+
"grad_norm": 1.2341272830963135,
|
| 2304 |
+
"learning_rate": 3.946025014234945e-05,
|
| 2305 |
+
"loss": 0.4346,
|
| 2306 |
+
"step": 16400
|
| 2307 |
}
|
| 2308 |
],
|
| 2309 |
"logging_steps": 50,
|