Training in progress, step 38800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
|
|
|
|
|
|
| 1190 |
| Epoch | Step | Training Loss |
|
| 1191 |
|:------:|:-----:|:-------------:|
|
| 1192 |
| 0.5946 | 33650 | 0.2952 |
|
|
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
|
|
| 1289 |
| 0.6803 | 38500 | 0.3343 |
|
| 1290 |
| 0.6812 | 38550 | 0.2411 |
|
| 1291 |
| 0.6821 | 38600 | 0.2641 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
|
|
|
|
| 1293 |
|
| 1294 |
### Framework Versions
|
| 1295 |
- Python: 3.11.13
|
|
|
|
| 1187 |
</details>
|
| 1188 |
|
| 1189 |
### Training Logs
|
| 1190 |
+
<details><summary>Click to expand</summary>
|
| 1191 |
+
|
| 1192 |
| Epoch | Step | Training Loss |
|
| 1193 |
|:------:|:-----:|:-------------:|
|
| 1194 |
| 0.5946 | 33650 | 0.2952 |
|
|
|
|
| 1291 |
| 0.6803 | 38500 | 0.3343 |
|
| 1292 |
| 0.6812 | 38550 | 0.2411 |
|
| 1293 |
| 0.6821 | 38600 | 0.2641 |
|
| 1294 |
+
| 0.6830 | 38650 | 0.3673 |
|
| 1295 |
+
| 0.6839 | 38700 | 0.3456 |
|
| 1296 |
+
| 0.6847 | 38750 | 0.2554 |
|
| 1297 |
+
| 0.6856 | 38800 | 0.2289 |
|
| 1298 |
|
| 1299 |
+
</details>
|
| 1300 |
|
| 1301 |
### Framework Versions
|
| 1302 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ee25825aff4959fe5e0a2781134b9e96da88058dbf8690e35445a67610af73c
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d050884a4252ba97d0bb88532a3164adec318594e47284644fd45a2c9c9a7cc4
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a8540dc3b35a3094fd45d5158b8f89bcbd7079e7ed16edbb02c263246cd8ebc
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64aa6796f2a5b524134346b6756db7d56f573132ef410dc4c37c3f8db2fcd06d
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac6c315739e8675a2e6e8b7f539ac0cd78a1153ea0c4434aeaf8094027721b1e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5412,6 +5412,34 @@
|
|
| 5412 |
"learning_rate": 1.7675875203706975e-05,
|
| 5413 |
"loss": 0.2641,
|
| 5414 |
"step": 38600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5415 |
}
|
| 5416 |
],
|
| 5417 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6856213885600184,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 38800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5412 |
"learning_rate": 1.7675875203706975e-05,
|
| 5413 |
"loss": 0.2641,
|
| 5414 |
"step": 38600
|
| 5415 |
+
},
|
| 5416 |
+
{
|
| 5417 |
+
"epoch": 0.6829707904083688,
|
| 5418 |
+
"grad_norm": 1.6398324966430664,
|
| 5419 |
+
"learning_rate": 1.762777090573521e-05,
|
| 5420 |
+
"loss": 0.3673,
|
| 5421 |
+
"step": 38650
|
| 5422 |
+
},
|
| 5423 |
+
{
|
| 5424 |
+
"epoch": 0.6838543231255854,
|
| 5425 |
+
"grad_norm": 1.5645078420639038,
|
| 5426 |
+
"learning_rate": 1.7578684887396674e-05,
|
| 5427 |
+
"loss": 0.3456,
|
| 5428 |
+
"step": 38700
|
| 5429 |
+
},
|
| 5430 |
+
{
|
| 5431 |
+
"epoch": 0.6847378558428019,
|
| 5432 |
+
"grad_norm": 1.4957185983657837,
|
| 5433 |
+
"learning_rate": 1.752959886905814e-05,
|
| 5434 |
+
"loss": 0.2554,
|
| 5435 |
+
"step": 38750
|
| 5436 |
+
},
|
| 5437 |
+
{
|
| 5438 |
+
"epoch": 0.6856213885600184,
|
| 5439 |
+
"grad_norm": 1.5689042806625366,
|
| 5440 |
+
"learning_rate": 1.7480512850719603e-05,
|
| 5441 |
+
"loss": 0.2289,
|
| 5442 |
+
"step": 38800
|
| 5443 |
}
|
| 5444 |
],
|
| 5445 |
"logging_steps": 50,
|