Training in progress, epoch 5, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -50,7 +50,7 @@ model-index:
|
|
| 50 |
type: unknown
|
| 51 |
metrics:
|
| 52 |
- type: cosine_accuracy
|
| 53 |
-
value: 0.
|
| 54 |
name: Cosine Accuracy
|
| 55 |
---
|
| 56 |
|
|
@@ -115,9 +115,9 @@ print(embeddings.shape)
|
|
| 115 |
# Get the similarity scores for the embeddings
|
| 116 |
similarities = model.similarity(embeddings, embeddings)
|
| 117 |
print(similarities)
|
| 118 |
-
# tensor([[1.0000, 0.
|
| 119 |
-
# [0.
|
| 120 |
-
# [0.
|
| 121 |
```
|
| 122 |
|
| 123 |
<!--
|
|
@@ -154,7 +154,7 @@ You can finetune this model on your own dataset.
|
|
| 154 |
|
| 155 |
| Metric | Value |
|
| 156 |
|:--------------------|:-----------|
|
| 157 |
-
| **cosine_accuracy** | **0.
|
| 158 |
|
| 159 |
<!--
|
| 160 |
## Bias, Risks and Limitations
|
|
@@ -364,9 +364,10 @@ You can finetune this model on your own dataset.
|
|
| 364 |
</details>
|
| 365 |
|
| 366 |
### Training Logs
|
| 367 |
-
| Epoch | Step
|
| 368 |
-
|
| 369 |
-
| 4.0 | 9112
|
|
|
|
| 370 |
|
| 371 |
|
| 372 |
### Framework Versions
|
|
|
|
| 50 |
type: unknown
|
| 51 |
metrics:
|
| 52 |
- type: cosine_accuracy
|
| 53 |
+
value: 0.9435034394264221
|
| 54 |
name: Cosine Accuracy
|
| 55 |
---
|
| 56 |
|
|
|
|
| 115 |
# Get the similarity scores for the embeddings
|
| 116 |
similarities = model.similarity(embeddings, embeddings)
|
| 117 |
print(similarities)
|
| 118 |
+
# tensor([[1.0000, 0.6910, 0.3222],
|
| 119 |
+
# [0.6910, 1.0000, 0.3099],
|
| 120 |
+
# [0.3222, 0.3099, 1.0000]])
|
| 121 |
```
|
| 122 |
|
| 123 |
<!--
|
|
|
|
| 154 |
|
| 155 |
| Metric | Value |
|
| 156 |
|:--------------------|:-----------|
|
| 157 |
+
| **cosine_accuracy** | **0.9435** |
|
| 158 |
|
| 159 |
<!--
|
| 160 |
## Bias, Risks and Limitations
|
|
|
|
| 364 |
</details>
|
| 365 |
|
| 366 |
### Training Logs
|
| 367 |
+
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
+
|:-----:|:-----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
+
| 4.0 | 9112 | 1.4316 | 0.7736 | 0.9375 |
|
| 370 |
+
| 5.0 | 11390 | 1.3415 | 0.7541 | 0.9435 |
|
| 371 |
|
| 372 |
|
| 373 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9f3402611e3b3c3f41f95210941eb470fba6ed05452b3849a27bd4dd433f91e
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180607738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76893f6607e3ebbfe6341fa0102ea836a998daa1b3f6024dda6df1ae75e07865
|
| 3 |
size 180607738
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37aac35e3c58c9053e71545ace22af7302bb0c360af070fa4f0ab6abc890f773
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdd86fc34d6a0f3e0667119a9de9c78b3da8a344f393071c4b8da7ec715e0886
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb1ca70040fe2933c9ba4528086551bb0337a289b19dabf99f736ffc96fdbab
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 5000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -47,6 +47,22 @@
|
|
| 47 |
"eval_samples_per_second": 295.608,
|
| 48 |
"eval_steps_per_second": 2.333,
|
| 49 |
"step": 9112
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
}
|
| 51 |
],
|
| 52 |
"logging_steps": 5000,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 5.0,
|
| 6 |
"eval_steps": 5000,
|
| 7 |
+
"global_step": 11390,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 47 |
"eval_samples_per_second": 295.608,
|
| 48 |
"eval_steps_per_second": 2.333,
|
| 49 |
"step": 9112
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 5.0,
|
| 53 |
+
"grad_norm": 10.955339431762695,
|
| 54 |
+
"learning_rate": 1.0434385002286237e-05,
|
| 55 |
+
"loss": 1.3415,
|
| 56 |
+
"step": 11390
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"epoch": 5.0,
|
| 60 |
+
"eval_cosine_accuracy": 0.9435034394264221,
|
| 61 |
+
"eval_loss": 0.7540939450263977,
|
| 62 |
+
"eval_runtime": 31.893,
|
| 63 |
+
"eval_samples_per_second": 298.028,
|
| 64 |
+
"eval_steps_per_second": 2.352,
|
| 65 |
+
"step": 11390
|
| 66 |
}
|
| 67 |
],
|
| 68 |
"logging_steps": 5000,
|