Training in progress, step 31600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
|
|
|
|
|
|
| 1190 |
| Epoch | Step | Training Loss |
|
| 1191 |
|:------:|:-----:|:-------------:|
|
| 1192 |
| 0.4674 | 26450 | 0.3511 |
|
|
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
|
|
| 1289 |
| 0.5531 | 31300 | 0.4005 |
|
| 1290 |
| 0.5540 | 31350 | 0.2993 |
|
| 1291 |
| 0.5549 | 31400 | 0.3463 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
|
|
|
|
| 1293 |
|
| 1294 |
### Framework Versions
|
| 1295 |
- Python: 3.11.13
|
|
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
| 1190 |
+
<details><summary>Click to expand</summary>
|
| 1191 |
+
|
| 1192 |
| Epoch | Step | Training Loss |
|
| 1193 |
|:------:|:-----:|:-------------:|
|
| 1194 |
| 0.4674 | 26450 | 0.3511 |
|
|
|
|
| 1291 |
| 0.5531 | 31300 | 0.4005 |
|
| 1292 |
| 0.5540 | 31350 | 0.2993 |
|
| 1293 |
| 0.5549 | 31400 | 0.3463 |
|
| 1294 |
+
| 0.5557 | 31450 | 0.3654 |
|
| 1295 |
+
| 0.5566 | 31500 | 0.3329 |
|
| 1296 |
+
| 0.5575 | 31550 | 0.2794 |
|
| 1297 |
+
| 0.5584 | 31600 | 0.4189 |
|
| 1298 |
|
| 1299 |
+
</details>
|
| 1300 |
|
| 1301 |
### Framework Versions
|
| 1302 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:037af2d441f69bccfeb282dafbe011432348a180c3a824cdedd50370b13aa3f5
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd0170e06425554bee01ebe42a564ab7255970144350ac51e311ccbc99405281
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a18c03e60472d4b8f510c853a2c7471950a1574efd6c20fb49cc26f42e6d1dd
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c19235faf86fc7b488d049c5de2dd9270a2ad7d80400a9f4692705e68f0f698f
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a676db8a1d42fae190e3de7197b8ccb119e27d8c3e860a70915ea0b98f39452b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4404,6 +4404,34 @@
|
|
| 4404 |
"learning_rate": 2.4741316683355912e-05,
|
| 4405 |
"loss": 0.3463,
|
| 4406 |
"step": 31400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4407 |
}
|
| 4408 |
],
|
| 4409 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5583926772808397,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 31600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4404 |
"learning_rate": 2.4741316683355912e-05,
|
| 4405 |
"loss": 0.3463,
|
| 4406 |
"step": 31400
|
| 4407 |
+
},
|
| 4408 |
+
{
|
| 4409 |
+
"epoch": 0.5557420791291902,
|
| 4410 |
+
"grad_norm": 1.5994555950164795,
|
| 4411 |
+
"learning_rate": 2.4692230665017377e-05,
|
| 4412 |
+
"loss": 0.3654,
|
| 4413 |
+
"step": 31450
|
| 4414 |
+
},
|
| 4415 |
+
{
|
| 4416 |
+
"epoch": 0.5566256118464067,
|
| 4417 |
+
"grad_norm": 1.528947114944458,
|
| 4418 |
+
"learning_rate": 2.4643144646678842e-05,
|
| 4419 |
+
"loss": 0.3329,
|
| 4420 |
+
"step": 31500
|
| 4421 |
+
},
|
| 4422 |
+
{
|
| 4423 |
+
"epoch": 0.5575091445636232,
|
| 4424 |
+
"grad_norm": 1.4391777515411377,
|
| 4425 |
+
"learning_rate": 2.4594058628340306e-05,
|
| 4426 |
+
"loss": 0.2794,
|
| 4427 |
+
"step": 31550
|
| 4428 |
+
},
|
| 4429 |
+
{
|
| 4430 |
+
"epoch": 0.5583926772808397,
|
| 4431 |
+
"grad_norm": 4.419312953948975,
|
| 4432 |
+
"learning_rate": 2.4544972610001768e-05,
|
| 4433 |
+
"loss": 0.4189,
|
| 4434 |
+
"step": 31600
|
| 4435 |
}
|
| 4436 |
],
|
| 4437 |
"logging_steps": 50,
|