Training in progress, epoch 2, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -67,7 +67,7 @@ model-index:
|
|
| 67 |
type: validation_eval
|
| 68 |
metrics:
|
| 69 |
- type: cosine_accuracy
|
| 70 |
-
value: 0.
|
| 71 |
name: Cosine Accuracy
|
| 72 |
---
|
| 73 |
|
|
@@ -172,7 +172,7 @@ You can finetune this model on your own dataset.
|
|
| 172 |
|
| 173 |
| Metric | Value |
|
| 174 |
|:--------------------|:-----------|
|
| 175 |
-
| **cosine_accuracy** | **0.
|
| 176 |
|
| 177 |
<!--
|
| 178 |
## Bias, Risks and Limitations
|
|
@@ -404,6 +404,7 @@ You can finetune this model on your own dataset.
|
|
| 404 |
|:-----:|:----:|:-------------:|:-------------------------------:|
|
| 405 |
| None | 0 | - | 0.9376 |
|
| 406 |
| 1.0 | 3374 | 22.5359 | 0.9636 |
|
|
|
|
| 407 |
|
| 408 |
|
| 409 |
### Framework Versions
|
|
|
|
| 67 |
type: validation_eval
|
| 68 |
metrics:
|
| 69 |
- type: cosine_accuracy
|
| 70 |
+
value: 0.9669108986854553
|
| 71 |
name: Cosine Accuracy
|
| 72 |
---
|
| 73 |
|
|
|
|
| 172 |
|
| 173 |
| Metric | Value |
|
| 174 |
|:--------------------|:-----------|
|
| 175 |
+
| **cosine_accuracy** | **0.9669** |
|
| 176 |
|
| 177 |
<!--
|
| 178 |
## Bias, Risks and Limitations
|
|
|
|
| 404 |
|:-----:|:----:|:-------------:|:-------------------------------:|
|
| 405 |
| None | 0 | - | 0.9376 |
|
| 406 |
| 1.0 | 3374 | 22.5359 | 0.9636 |
|
| 407 |
+
| 2.0 | 6748 | 12.387 | 0.9669 |
|
| 408 |
|
| 409 |
|
| 410 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1112197096
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f45b709189f10005c531c512d16fb09ef3d6e63e17ec739dab661444f1c7e1
|
| 3 |
size 1112197096
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1712831051
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d835f6164cae9cfcc79e04be75997ec26b3c808d84e04c121ad4cbb6008cdc6b
|
| 3 |
size 1712831051
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfac7d3bc2b72378c1bf4f5d3aaef89d3e826b5add66b63f70b5a6a7445108f5
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26071fac0a5a2d20a87fc638a2db503999ac83894881340f96948ea63414522f
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a097af662cf910e136a0a6b4d2d74f5cec8c2c42c0a692b8fa0f065b7ea1f65
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/kaggle/working/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -24,6 +24,22 @@
|
|
| 24 |
"eval_steps_per_second": 0.0,
|
| 25 |
"eval_validation_eval_cosine_accuracy": 0.9636484980583191,
|
| 26 |
"step": 3374
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}
|
| 28 |
],
|
| 29 |
"logging_steps": 500,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 6748,
|
| 3 |
+
"best_metric": 0.9669108986854553,
|
| 4 |
+
"best_model_checkpoint": "/kaggle/working/checkpoint-6748",
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6748,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 24 |
"eval_steps_per_second": 0.0,
|
| 25 |
"eval_validation_eval_cosine_accuracy": 0.9636484980583191,
|
| 26 |
"step": 3374
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 2.0,
|
| 30 |
+
"grad_norm": 177.7242431640625,
|
| 31 |
+
"learning_rate": 1.2150011406441293e-05,
|
| 32 |
+
"loss": 12.387,
|
| 33 |
+
"step": 6748
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 2.0,
|
| 37 |
+
"eval_model_preparation_time": 0.0078,
|
| 38 |
+
"eval_runtime": 142.7381,
|
| 39 |
+
"eval_samples_per_second": 0.0,
|
| 40 |
+
"eval_steps_per_second": 0.0,
|
| 41 |
+
"eval_validation_eval_cosine_accuracy": 0.9669108986854553,
|
| 42 |
+
"step": 6748
|
| 43 |
}
|
| 44 |
],
|
| 45 |
"logging_steps": 500,
|