Training in progress, step 4250, checkpoint
Browse files- last-checkpoint/2_Dense/model.safetensors +1 -1
- last-checkpoint/README.md +44 -19
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +195 -2
last-checkpoint/2_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3149984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a59bfc4cd3767747c580ac670f0d6c48bfe9e402250467b22e693fdfc61b625d
|
| 3 |
size 3149984
|
last-checkpoint/README.md
CHANGED
|
@@ -60,34 +60,34 @@ model-index:
|
|
| 60 |
type: sts_dev
|
| 61 |
metrics:
|
| 62 |
- type: pearson_cosine
|
| 63 |
-
value: 0.
|
| 64 |
name: Pearson Cosine
|
| 65 |
- type: spearman_cosine
|
| 66 |
-
value: 0.
|
| 67 |
name: Spearman Cosine
|
| 68 |
- type: pearson_euclidean
|
| 69 |
-
value: 0.
|
| 70 |
name: Pearson Euclidean
|
| 71 |
- type: spearman_euclidean
|
| 72 |
-
value: 0.
|
| 73 |
name: Spearman Euclidean
|
| 74 |
- type: pearson_manhattan
|
| 75 |
-
value: 0.
|
| 76 |
name: Pearson Manhattan
|
| 77 |
- type: spearman_manhattan
|
| 78 |
-
value: 0.
|
| 79 |
name: Spearman Manhattan
|
| 80 |
- type: pearson_dot
|
| 81 |
-
value: 0.
|
| 82 |
name: Pearson Dot
|
| 83 |
- type: spearman_dot
|
| 84 |
-
value: 0.
|
| 85 |
name: Spearman Dot
|
| 86 |
- type: pearson_max
|
| 87 |
-
value: 0.
|
| 88 |
name: Pearson Max
|
| 89 |
- type: spearman_max
|
| 90 |
-
value: 0.
|
| 91 |
name: Spearman Max
|
| 92 |
---
|
| 93 |
|
|
@@ -191,16 +191,16 @@ You can finetune this model on your own dataset.
|
|
| 191 |
|
| 192 |
| Metric | Value |
|
| 193 |
|:-------------------|:-----------|
|
| 194 |
-
| pearson_cosine | 0.
|
| 195 |
-
| spearman_cosine | 0.
|
| 196 |
-
| pearson_euclidean | 0.
|
| 197 |
-
| spearman_euclidean | 0.
|
| 198 |
-
| pearson_manhattan | 0.
|
| 199 |
| spearman_manhattan | 0.7997 |
|
| 200 |
-
| pearson_dot | 0.
|
| 201 |
-
| spearman_dot | 0.
|
| 202 |
-
| pearson_max | 0.
|
| 203 |
-
| **spearman_max** | **0.
|
| 204 |
|
| 205 |
<!--
|
| 206 |
## Bias, Risks and Limitations
|
|
@@ -805,6 +805,31 @@ You can finetune this model on your own dataset.
|
|
| 805 |
| 9.0455 | 3980 | 0.0638 | - | - |
|
| 806 |
| 9.0683 | 3990 | 0.0625 | - | - |
|
| 807 |
| 9.0911 | 4000 | 0.0665 | 0.0414 | 0.8276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
|
| 809 |
</details>
|
| 810 |
|
|
|
|
| 60 |
type: sts_dev
|
| 61 |
metrics:
|
| 62 |
- type: pearson_cosine
|
| 63 |
+
value: 0.8220874775898197
|
| 64 |
name: Pearson Cosine
|
| 65 |
- type: spearman_cosine
|
| 66 |
+
value: 0.8282368218808581
|
| 67 |
name: Spearman Cosine
|
| 68 |
- type: pearson_euclidean
|
| 69 |
+
value: 0.7929031352092236
|
| 70 |
name: Pearson Euclidean
|
| 71 |
- type: spearman_euclidean
|
| 72 |
+
value: 0.7979913252239026
|
| 73 |
name: Spearman Euclidean
|
| 74 |
- type: pearson_manhattan
|
| 75 |
+
value: 0.7936882861676204
|
| 76 |
name: Pearson Manhattan
|
| 77 |
- type: spearman_manhattan
|
| 78 |
+
value: 0.7996541111809876
|
| 79 |
name: Spearman Manhattan
|
| 80 |
- type: pearson_dot
|
| 81 |
+
value: 0.7010536213435227
|
| 82 |
name: Pearson Dot
|
| 83 |
- type: spearman_dot
|
| 84 |
+
value: 0.6844746263331734
|
| 85 |
name: Spearman Dot
|
| 86 |
- type: pearson_max
|
| 87 |
+
value: 0.8220874775898197
|
| 88 |
name: Pearson Max
|
| 89 |
- type: spearman_max
|
| 90 |
+
value: 0.8282368218808581
|
| 91 |
name: Spearman Max
|
| 92 |
---
|
| 93 |
|
|
|
|
| 191 |
|
| 192 |
| Metric | Value |
|
| 193 |
|:-------------------|:-----------|
|
| 194 |
+
| pearson_cosine | 0.8221 |
|
| 195 |
+
| spearman_cosine | 0.8282 |
|
| 196 |
+
| pearson_euclidean | 0.7929 |
|
| 197 |
+
| spearman_euclidean | 0.798 |
|
| 198 |
+
| pearson_manhattan | 0.7937 |
|
| 199 |
| spearman_manhattan | 0.7997 |
|
| 200 |
+
| pearson_dot | 0.7011 |
|
| 201 |
+
| spearman_dot | 0.6845 |
|
| 202 |
+
| pearson_max | 0.8221 |
|
| 203 |
+
| **spearman_max** | **0.8282** |
|
| 204 |
|
| 205 |
<!--
|
| 206 |
## Bias, Risks and Limitations
|
|
|
|
| 805 |
| 9.0455 | 3980 | 0.0638 | - | - |
|
| 806 |
| 9.0683 | 3990 | 0.0625 | - | - |
|
| 807 |
| 9.0911 | 4000 | 0.0665 | 0.0414 | 0.8276 |
|
| 808 |
+
| 9.1138 | 4010 | 0.0624 | - | - |
|
| 809 |
+
| 9.1366 | 4020 | 0.0621 | - | - |
|
| 810 |
+
| 9.1593 | 4030 | 0.0648 | - | - |
|
| 811 |
+
| 9.1821 | 4040 | 0.0622 | - | - |
|
| 812 |
+
| 9.2049 | 4050 | 0.0635 | - | - |
|
| 813 |
+
| 9.2276 | 4060 | 0.061 | - | - |
|
| 814 |
+
| 9.2504 | 4070 | 0.0602 | - | - |
|
| 815 |
+
| 9.2732 | 4080 | 0.0613 | - | - |
|
| 816 |
+
| 9.2959 | 4090 | 0.0604 | - | - |
|
| 817 |
+
| 9.3187 | 4100 | 0.0623 | - | - |
|
| 818 |
+
| 9.3414 | 4110 | 0.0641 | - | - |
|
| 819 |
+
| 9.3642 | 4120 | 0.0635 | - | - |
|
| 820 |
+
| 9.3870 | 4130 | 0.0608 | - | - |
|
| 821 |
+
| 9.4097 | 4140 | 0.0611 | - | - |
|
| 822 |
+
| 9.4325 | 4150 | 0.0607 | - | - |
|
| 823 |
+
| 9.4553 | 4160 | 0.0631 | - | - |
|
| 824 |
+
| 9.4780 | 4170 | 0.0618 | - | - |
|
| 825 |
+
| 9.5008 | 4180 | 0.0609 | - | - |
|
| 826 |
+
| 9.5235 | 4190 | 0.0613 | - | - |
|
| 827 |
+
| 9.5463 | 4200 | 0.0606 | - | - |
|
| 828 |
+
| 9.5691 | 4210 | 0.0595 | - | - |
|
| 829 |
+
| 9.5918 | 4220 | 0.0609 | - | - |
|
| 830 |
+
| 9.6146 | 4230 | 0.061 | - | - |
|
| 831 |
+
| 9.6374 | 4240 | 0.0616 | - | - |
|
| 832 |
+
| 9.6601 | 4250 | 0.0613 | 0.0418 | 0.8282 |
|
| 833 |
|
| 834 |
</details>
|
| 835 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735216376
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66daefb719ad12215c08363cf07f604053315b28142583dcc866c834327eca3f
|
| 3 |
size 735216376
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1476823354
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:796b01c86922133da7b4702097cf156006e03e00f92d857ba3d2713e738810f2
|
| 3 |
size 1476823354
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a734f96fdbf1b2b95f5a896a45ac06db48cebeba2dcddafafaf5c42500c1f8ba
|
| 3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4179c3a5721b96913d5982f5899f5a8134fa075bf224efaaeb574cd846c07bbf
|
| 3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55f5c2af0a83fa2c2de4c1c2429806c3814277f1d706282352eeb894c157a06f
|
| 3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cca4516c2bb67a2a1691e38c770742a680a94828f839610d2ffa43419db4feba
|
| 3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cb45b1fc4043ea836f442423485d57eb9667bd00787e4c2417e1a25ab32a480
|
| 3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ec528339d849d7328578e52ee72da1edaa069275122e1908976fd336632067e
|
| 3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18f45c589b9a8c923ac9908849cfe569a36e99bfb6aaf6913e76e736935b42a2
|
| 3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7571acf39c17540211b353a65ed07e95044bb1a68001f53b77c1f7bb674917b
|
| 3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:451fe1a5f62f2f6eed0b67a70a5f8f0f813e8a38e58c106c948a6c2c9e79f8ef
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3095,6 +3095,199 @@
|
|
| 3095 |
"eval_sts_dev_spearman_manhattan": 0.7997185742063436,
|
| 3096 |
"eval_sts_dev_spearman_max": 0.8276471334482826,
|
| 3097 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3098 |
}
|
| 3099 |
],
|
| 3100 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.660122350263196,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 4250,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3095 |
"eval_sts_dev_spearman_manhattan": 0.7997185742063436,
|
| 3096 |
"eval_sts_dev_spearman_max": 0.8276471334482826,
|
| 3097 |
"step": 4000
|
| 3098 |
+
},
|
| 3099 |
+
{
|
| 3100 |
+
"epoch": 9.11381419832124,
|
| 3101 |
+
"grad_norm": 0.23453885316848755,
|
| 3102 |
+
"learning_rate": 2.852164017212561e-06,
|
| 3103 |
+
"loss": 0.0624,
|
| 3104 |
+
"step": 4010
|
| 3105 |
+
},
|
| 3106 |
+
{
|
| 3107 |
+
"epoch": 9.13657703798549,
|
| 3108 |
+
"grad_norm": 0.22881363332271576,
|
| 3109 |
+
"learning_rate": 2.859276645684413e-06,
|
| 3110 |
+
"loss": 0.0621,
|
| 3111 |
+
"step": 4020
|
| 3112 |
+
},
|
| 3113 |
+
{
|
| 3114 |
+
"epoch": 9.159339877649737,
|
| 3115 |
+
"grad_norm": 0.21634767949581146,
|
| 3116 |
+
"learning_rate": 2.866389274156265e-06,
|
| 3117 |
+
"loss": 0.0648,
|
| 3118 |
+
"step": 4030
|
| 3119 |
+
},
|
| 3120 |
+
{
|
| 3121 |
+
"epoch": 9.182102717313985,
|
| 3122 |
+
"grad_norm": 0.2653968334197998,
|
| 3123 |
+
"learning_rate": 2.8735019026281164e-06,
|
| 3124 |
+
"loss": 0.0622,
|
| 3125 |
+
"step": 4040
|
| 3126 |
+
},
|
| 3127 |
+
{
|
| 3128 |
+
"epoch": 9.204865556978232,
|
| 3129 |
+
"grad_norm": 0.2806706726551056,
|
| 3130 |
+
"learning_rate": 2.8806145310999684e-06,
|
| 3131 |
+
"loss": 0.0635,
|
| 3132 |
+
"step": 4050
|
| 3133 |
+
},
|
| 3134 |
+
{
|
| 3135 |
+
"epoch": 9.227628396642482,
|
| 3136 |
+
"grad_norm": 0.25029635429382324,
|
| 3137 |
+
"learning_rate": 2.88772715957182e-06,
|
| 3138 |
+
"loss": 0.061,
|
| 3139 |
+
"step": 4060
|
| 3140 |
+
},
|
| 3141 |
+
{
|
| 3142 |
+
"epoch": 9.25039123630673,
|
| 3143 |
+
"grad_norm": 0.24983397126197815,
|
| 3144 |
+
"learning_rate": 2.894839788043672e-06,
|
| 3145 |
+
"loss": 0.0602,
|
| 3146 |
+
"step": 4070
|
| 3147 |
+
},
|
| 3148 |
+
{
|
| 3149 |
+
"epoch": 9.273154075970977,
|
| 3150 |
+
"grad_norm": 0.21316730976104736,
|
| 3151 |
+
"learning_rate": 2.9019524165155234e-06,
|
| 3152 |
+
"loss": 0.0613,
|
| 3153 |
+
"step": 4080
|
| 3154 |
+
},
|
| 3155 |
+
{
|
| 3156 |
+
"epoch": 9.295916915635225,
|
| 3157 |
+
"grad_norm": 0.21870028972625732,
|
| 3158 |
+
"learning_rate": 2.9090650449873754e-06,
|
| 3159 |
+
"loss": 0.0604,
|
| 3160 |
+
"step": 4090
|
| 3161 |
+
},
|
| 3162 |
+
{
|
| 3163 |
+
"epoch": 9.318679755299474,
|
| 3164 |
+
"grad_norm": 0.21702495217323303,
|
| 3165 |
+
"learning_rate": 2.9161776734592273e-06,
|
| 3166 |
+
"loss": 0.0623,
|
| 3167 |
+
"step": 4100
|
| 3168 |
+
},
|
| 3169 |
+
{
|
| 3170 |
+
"epoch": 9.341442594963722,
|
| 3171 |
+
"grad_norm": 0.22777798771858215,
|
| 3172 |
+
"learning_rate": 2.923290301931079e-06,
|
| 3173 |
+
"loss": 0.0641,
|
| 3174 |
+
"step": 4110
|
| 3175 |
+
},
|
| 3176 |
+
{
|
| 3177 |
+
"epoch": 9.36420543462797,
|
| 3178 |
+
"grad_norm": 0.2656283378601074,
|
| 3179 |
+
"learning_rate": 2.930402930402931e-06,
|
| 3180 |
+
"loss": 0.0635,
|
| 3181 |
+
"step": 4120
|
| 3182 |
+
},
|
| 3183 |
+
{
|
| 3184 |
+
"epoch": 9.386968274292219,
|
| 3185 |
+
"grad_norm": 0.23527038097381592,
|
| 3186 |
+
"learning_rate": 2.9375155588747823e-06,
|
| 3187 |
+
"loss": 0.0608,
|
| 3188 |
+
"step": 4130
|
| 3189 |
+
},
|
| 3190 |
+
{
|
| 3191 |
+
"epoch": 9.409731113956466,
|
| 3192 |
+
"grad_norm": 0.21856476366519928,
|
| 3193 |
+
"learning_rate": 2.9446281873466343e-06,
|
| 3194 |
+
"loss": 0.0611,
|
| 3195 |
+
"step": 4140
|
| 3196 |
+
},
|
| 3197 |
+
{
|
| 3198 |
+
"epoch": 9.432493953620714,
|
| 3199 |
+
"grad_norm": 0.23688729107379913,
|
| 3200 |
+
"learning_rate": 2.951740815818486e-06,
|
| 3201 |
+
"loss": 0.0607,
|
| 3202 |
+
"step": 4150
|
| 3203 |
+
},
|
| 3204 |
+
{
|
| 3205 |
+
"epoch": 9.455256793284962,
|
| 3206 |
+
"grad_norm": 0.26457446813583374,
|
| 3207 |
+
"learning_rate": 2.9588534442903377e-06,
|
| 3208 |
+
"loss": 0.0631,
|
| 3209 |
+
"step": 4160
|
| 3210 |
+
},
|
| 3211 |
+
{
|
| 3212 |
+
"epoch": 9.478019632949211,
|
| 3213 |
+
"grad_norm": 0.31578782200813293,
|
| 3214 |
+
"learning_rate": 2.9659660727621897e-06,
|
| 3215 |
+
"loss": 0.0618,
|
| 3216 |
+
"step": 4170
|
| 3217 |
+
},
|
| 3218 |
+
{
|
| 3219 |
+
"epoch": 9.500782472613459,
|
| 3220 |
+
"grad_norm": 0.23187491297721863,
|
| 3221 |
+
"learning_rate": 2.9730787012340412e-06,
|
| 3222 |
+
"loss": 0.0609,
|
| 3223 |
+
"step": 4180
|
| 3224 |
+
},
|
| 3225 |
+
{
|
| 3226 |
+
"epoch": 9.523545312277706,
|
| 3227 |
+
"grad_norm": 0.24577929079532623,
|
| 3228 |
+
"learning_rate": 2.980191329705893e-06,
|
| 3229 |
+
"loss": 0.0613,
|
| 3230 |
+
"step": 4190
|
| 3231 |
+
},
|
| 3232 |
+
{
|
| 3233 |
+
"epoch": 9.546308151941954,
|
| 3234 |
+
"grad_norm": 0.23201169073581696,
|
| 3235 |
+
"learning_rate": 2.9873039581777447e-06,
|
| 3236 |
+
"loss": 0.0606,
|
| 3237 |
+
"step": 4200
|
| 3238 |
+
},
|
| 3239 |
+
{
|
| 3240 |
+
"epoch": 9.569070991606203,
|
| 3241 |
+
"grad_norm": 0.2860512137413025,
|
| 3242 |
+
"learning_rate": 2.9944165866495967e-06,
|
| 3243 |
+
"loss": 0.0595,
|
| 3244 |
+
"step": 4210
|
| 3245 |
+
},
|
| 3246 |
+
{
|
| 3247 |
+
"epoch": 9.591833831270451,
|
| 3248 |
+
"grad_norm": 0.237753763794899,
|
| 3249 |
+
"learning_rate": 3.001529215121448e-06,
|
| 3250 |
+
"loss": 0.0609,
|
| 3251 |
+
"step": 4220
|
| 3252 |
+
},
|
| 3253 |
+
{
|
| 3254 |
+
"epoch": 9.614596670934699,
|
| 3255 |
+
"grad_norm": 0.23422682285308838,
|
| 3256 |
+
"learning_rate": 3.0086418435933e-06,
|
| 3257 |
+
"loss": 0.061,
|
| 3258 |
+
"step": 4230
|
| 3259 |
+
},
|
| 3260 |
+
{
|
| 3261 |
+
"epoch": 9.637359510598948,
|
| 3262 |
+
"grad_norm": 0.2497267723083496,
|
| 3263 |
+
"learning_rate": 3.015754472065152e-06,
|
| 3264 |
+
"loss": 0.0616,
|
| 3265 |
+
"step": 4240
|
| 3266 |
+
},
|
| 3267 |
+
{
|
| 3268 |
+
"epoch": 9.660122350263196,
|
| 3269 |
+
"grad_norm": 0.2505936622619629,
|
| 3270 |
+
"learning_rate": 3.0228671005370036e-06,
|
| 3271 |
+
"loss": 0.0613,
|
| 3272 |
+
"step": 4250
|
| 3273 |
+
},
|
| 3274 |
+
{
|
| 3275 |
+
"epoch": 9.660122350263196,
|
| 3276 |
+
"eval_loss": 0.04175787419080734,
|
| 3277 |
+
"eval_runtime": 3.1427,
|
| 3278 |
+
"eval_samples_per_second": 477.3,
|
| 3279 |
+
"eval_steps_per_second": 7.637,
|
| 3280 |
+
"eval_sts_dev_pearson_cosine": 0.8220874775898197,
|
| 3281 |
+
"eval_sts_dev_pearson_dot": 0.7010536213435227,
|
| 3282 |
+
"eval_sts_dev_pearson_euclidean": 0.7929031352092236,
|
| 3283 |
+
"eval_sts_dev_pearson_manhattan": 0.7936882861676204,
|
| 3284 |
+
"eval_sts_dev_pearson_max": 0.8220874775898197,
|
| 3285 |
+
"eval_sts_dev_spearman_cosine": 0.8282368218808581,
|
| 3286 |
+
"eval_sts_dev_spearman_dot": 0.6844746263331734,
|
| 3287 |
+
"eval_sts_dev_spearman_euclidean": 0.7979913252239026,
|
| 3288 |
+
"eval_sts_dev_spearman_manhattan": 0.7996541111809876,
|
| 3289 |
+
"eval_sts_dev_spearman_max": 0.8282368218808581,
|
| 3290 |
+
"step": 4250
|
| 3291 |
}
|
| 3292 |
],
|
| 3293 |
"logging_steps": 10,
|