Training in progress, step 22500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:596c1e3afdd59a5f888e2e097f32bc12312812a5874bc9ba56a6a84a477bb16a
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5db73359845187641317b27e1b4060f1552fef0e348ea1b12216d2d2fe327502
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59a02c4a661b5f9af67ada735fa8a7871859bbc9bec13b24943a443d89f55f00
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5024d85f7d5741e2ee48baa326700e94d49b2910a8865857cf6ad58106fa2d05
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3439,6 +3439,84 @@
|
|
| 3439 |
"eval_samples_per_second": 22.704,
|
| 3440 |
"eval_steps_per_second": 5.676,
|
| 3441 |
"step": 22000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3442 |
}
|
| 3443 |
],
|
| 3444 |
"logging_steps": 50,
|
|
@@ -3458,7 +3536,7 @@
|
|
| 3458 |
"attributes": {}
|
| 3459 |
}
|
| 3460 |
},
|
| 3461 |
-
"total_flos": 5.
|
| 3462 |
"train_batch_size": 4,
|
| 3463 |
"trial_name": null,
|
| 3464 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.0815029963850975,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-22500",
|
| 4 |
+
"epoch": 1.8,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 22500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3439 |
"eval_samples_per_second": 22.704,
|
| 3440 |
"eval_steps_per_second": 5.676,
|
| 3441 |
"step": 22000
|
| 3442 |
+
},
|
| 3443 |
+
{
|
| 3444 |
+
"epoch": 1.764,
|
| 3445 |
+
"grad_norm": 0.18784619867801666,
|
| 3446 |
+
"learning_rate": 3.5483999999999998e-06,
|
| 3447 |
+
"loss": 0.0559,
|
| 3448 |
+
"step": 22050
|
| 3449 |
+
},
|
| 3450 |
+
{
|
| 3451 |
+
"epoch": 1.768,
|
| 3452 |
+
"grad_norm": 0.1378210335969925,
|
| 3453 |
+
"learning_rate": 3.4884e-06,
|
| 3454 |
+
"loss": 0.0528,
|
| 3455 |
+
"step": 22100
|
| 3456 |
+
},
|
| 3457 |
+
{
|
| 3458 |
+
"epoch": 1.772,
|
| 3459 |
+
"grad_norm": 0.1482810080051422,
|
| 3460 |
+
"learning_rate": 3.4284e-06,
|
| 3461 |
+
"loss": 0.0535,
|
| 3462 |
+
"step": 22150
|
| 3463 |
+
},
|
| 3464 |
+
{
|
| 3465 |
+
"epoch": 1.776,
|
| 3466 |
+
"grad_norm": 0.2096211314201355,
|
| 3467 |
+
"learning_rate": 3.3684000000000002e-06,
|
| 3468 |
+
"loss": 0.0455,
|
| 3469 |
+
"step": 22200
|
| 3470 |
+
},
|
| 3471 |
+
{
|
| 3472 |
+
"epoch": 1.78,
|
| 3473 |
+
"grad_norm": 0.16895908117294312,
|
| 3474 |
+
"learning_rate": 3.3084000000000004e-06,
|
| 3475 |
+
"loss": 0.0527,
|
| 3476 |
+
"step": 22250
|
| 3477 |
+
},
|
| 3478 |
+
{
|
| 3479 |
+
"epoch": 1.784,
|
| 3480 |
+
"grad_norm": 0.11279798299074173,
|
| 3481 |
+
"learning_rate": 3.2484e-06,
|
| 3482 |
+
"loss": 0.0524,
|
| 3483 |
+
"step": 22300
|
| 3484 |
+
},
|
| 3485 |
+
{
|
| 3486 |
+
"epoch": 1.788,
|
| 3487 |
+
"grad_norm": 0.1444096565246582,
|
| 3488 |
+
"learning_rate": 3.1884000000000003e-06,
|
| 3489 |
+
"loss": 0.0539,
|
| 3490 |
+
"step": 22350
|
| 3491 |
+
},
|
| 3492 |
+
{
|
| 3493 |
+
"epoch": 1.792,
|
| 3494 |
+
"grad_norm": 0.16793648898601532,
|
| 3495 |
+
"learning_rate": 3.1284e-06,
|
| 3496 |
+
"loss": 0.0578,
|
| 3497 |
+
"step": 22400
|
| 3498 |
+
},
|
| 3499 |
+
{
|
| 3500 |
+
"epoch": 1.796,
|
| 3501 |
+
"grad_norm": 0.1865730732679367,
|
| 3502 |
+
"learning_rate": 3.0684e-06,
|
| 3503 |
+
"loss": 0.0538,
|
| 3504 |
+
"step": 22450
|
| 3505 |
+
},
|
| 3506 |
+
{
|
| 3507 |
+
"epoch": 1.8,
|
| 3508 |
+
"grad_norm": 0.11493191868066788,
|
| 3509 |
+
"learning_rate": 3.0084e-06,
|
| 3510 |
+
"loss": 0.0515,
|
| 3511 |
+
"step": 22500
|
| 3512 |
+
},
|
| 3513 |
+
{
|
| 3514 |
+
"epoch": 1.8,
|
| 3515 |
+
"eval_loss": 0.0815029963850975,
|
| 3516 |
+
"eval_runtime": 88.0709,
|
| 3517 |
+
"eval_samples_per_second": 22.709,
|
| 3518 |
+
"eval_steps_per_second": 5.677,
|
| 3519 |
+
"step": 22500
|
| 3520 |
}
|
| 3521 |
],
|
| 3522 |
"logging_steps": 50,
|
|
|
|
| 3536 |
"attributes": {}
|
| 3537 |
}
|
| 3538 |
},
|
| 3539 |
+
"total_flos": 5.48062101504e+16,
|
| 3540 |
"train_batch_size": 4,
|
| 3541 |
"trial_name": null,
|
| 3542 |
"trial_params": null
|