Training in progress, step 1400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1248,6 +1248,10 @@ You can finetune this model on your own dataset.
|
|
| 1248 |
| 0.0194 | 1100 | 0.6169 |
|
| 1249 |
| 0.0203 | 1150 | 0.6367 |
|
| 1250 |
| 0.0212 | 1200 | 0.615 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1251 |
|
| 1252 |
|
| 1253 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6098, 0.1159],
|
| 290 |
+
# [0.6098, 1.0000, 0.1612],
|
| 291 |
+
# [0.1159, 0.1612, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1248 |
| 0.0194 | 1100 | 0.6169 |
|
| 1249 |
| 0.0203 | 1150 | 0.6367 |
|
| 1250 |
| 0.0212 | 1200 | 0.615 |
|
| 1251 |
+
| 0.0221 | 1250 | 0.6869 |
|
| 1252 |
+
| 0.0230 | 1300 | 0.7527 |
|
| 1253 |
+
| 0.0239 | 1350 | 0.6124 |
|
| 1254 |
+
| 0.0247 | 1400 | 0.6511 |
|
| 1255 |
|
| 1256 |
|
| 1257 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50f82f7a3e99ac380e6c75e094e8a65af18eab74684e14810366f1f357e41d60
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:075842c04a113f74618952773066d4169a59d801ccaeebc81d0b3d4f339980ae
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e01322e389b4b8eb7d66bcbb01e77ee7865373b24e1202361d2a597222b56d92
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3b03c8c40e45be48b5956b82e1abb2c1ec5641f4fc77e6fdec0942a77964500
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f365945c29bbfef6ade3ff30db3a8d39cad690fb183ff7034a93fcc76535d32d
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -176,6 +176,34 @@
|
|
| 176 |
"learning_rate": 1.0591872791519434e-05,
|
| 177 |
"loss": 0.615,
|
| 178 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
}
|
| 180 |
],
|
| 181 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.02473891608206252,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 176 |
"learning_rate": 1.0591872791519434e-05,
|
| 177 |
"loss": 0.615,
|
| 178 |
"step": 1200
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 0.02208831793041296,
|
| 182 |
+
"grad_norm": 3.260239601135254,
|
| 183 |
+
"learning_rate": 1.103356890459364e-05,
|
| 184 |
+
"loss": 0.6869,
|
| 185 |
+
"step": 1250
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 0.022971850647629483,
|
| 189 |
+
"grad_norm": 2.485383987426758,
|
| 190 |
+
"learning_rate": 1.1475265017667845e-05,
|
| 191 |
+
"loss": 0.7527,
|
| 192 |
+
"step": 1300
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 0.023855383364846,
|
| 196 |
+
"grad_norm": 2.26680326461792,
|
| 197 |
+
"learning_rate": 1.191696113074205e-05,
|
| 198 |
+
"loss": 0.6124,
|
| 199 |
+
"step": 1350
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.02473891608206252,
|
| 203 |
+
"grad_norm": 2.348688840866089,
|
| 204 |
+
"learning_rate": 1.2358657243816255e-05,
|
| 205 |
+
"loss": 0.6511,
|
| 206 |
+
"step": 1400
|
| 207 |
}
|
| 208 |
],
|
| 209 |
"logging_steps": 50,
|