Training in progress, step 1600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1252,6 +1252,10 @@ You can finetune this model on your own dataset.
|
|
| 1252 |
| 0.0230 | 1300 | 0.7527 |
|
| 1253 |
| 0.0239 | 1350 | 0.6124 |
|
| 1254 |
| 0.0247 | 1400 | 0.6511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1255 |
|
| 1256 |
|
| 1257 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.5831, 0.1091],
|
| 290 |
+
# [0.5831, 1.0000, 0.1491],
|
| 291 |
+
# [0.1091, 0.1491, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1252 |
| 0.0230 | 1300 | 0.7527 |
|
| 1253 |
| 0.0239 | 1350 | 0.6124 |
|
| 1254 |
| 0.0247 | 1400 | 0.6511 |
|
| 1255 |
+
| 0.0256 | 1450 | 0.7047 |
|
| 1256 |
+
| 0.0265 | 1500 | 0.6639 |
|
| 1257 |
+
| 0.0274 | 1550 | 0.6795 |
|
| 1258 |
+
| 0.0283 | 1600 | 0.6341 |
|
| 1259 |
|
| 1260 |
|
| 1261 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61a082ce4f7292aa8d2ccb7ce494bab8f2b629411a594d12d441a6b6b4b0923
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa9774e69968408986a253b355147319deaf91c49e1a0418b0c871e674bc9e7d
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c221931df6dfe32bf8ee57ce8b3fe8867a8a29cca913a459c26e86b456713867
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e4420338904222790575638e536e5d59f25756f58070a9dfc3e73602e7fbef5
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c5f1576da158c64c9c0e76311baa11fecb51a267280f5c39299d29d917bc401
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -204,6 +204,34 @@
|
|
| 204 |
"learning_rate": 1.2358657243816255e-05,
|
| 205 |
"loss": 0.6511,
|
| 206 |
"step": 1400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
}
|
| 208 |
],
|
| 209 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.028273046950928592,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 204 |
"learning_rate": 1.2358657243816255e-05,
|
| 205 |
"loss": 0.6511,
|
| 206 |
"step": 1400
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.025622448799279038,
|
| 210 |
+
"grad_norm": 2.770859956741333,
|
| 211 |
+
"learning_rate": 1.280035335689046e-05,
|
| 212 |
+
"loss": 0.7047,
|
| 213 |
+
"step": 1450
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 0.026505981516495556,
|
| 217 |
+
"grad_norm": 3.188656806945801,
|
| 218 |
+
"learning_rate": 1.3242049469964666e-05,
|
| 219 |
+
"loss": 0.6639,
|
| 220 |
+
"step": 1500
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.027389514233712074,
|
| 224 |
+
"grad_norm": 2.7158899307250977,
|
| 225 |
+
"learning_rate": 1.368374558303887e-05,
|
| 226 |
+
"loss": 0.6795,
|
| 227 |
+
"step": 1550
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 0.028273046950928592,
|
| 231 |
+
"grad_norm": 2.7986080646514893,
|
| 232 |
+
"learning_rate": 1.4125441696113076e-05,
|
| 233 |
+
"loss": 0.6341,
|
| 234 |
+
"step": 1600
|
| 235 |
}
|
| 236 |
],
|
| 237 |
"logging_steps": 50,
|