Training in progress, step 4250, checkpoint

Browse files

Files changed (14) hide show

last-checkpoint/2_Dense/model.safetensors +1 -1
last-checkpoint/README.md +44 -19
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +195 -2

last-checkpoint/2_Dense/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7fba931953e737082f85cb7f165d1d9d36de76853c1f92e1adb93636e064009
 size 3149984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a59bfc4cd3767747c580ac670f0d6c48bfe9e402250467b22e693fdfc61b625d
 size 3149984

last-checkpoint/README.md CHANGED Viewed

@@ -60,34 +60,34 @@ model-index:
       type: sts_dev
     metrics:
     - type: pearson_cosine
-      value: 0.8220285778407846
       name: Pearson Cosine
     - type: spearman_cosine
-      value: 0.8276471334482826
       name: Spearman Cosine
     - type: pearson_euclidean
-      value: 0.7933532583617332
       name: Pearson Euclidean
     - type: spearman_euclidean
-      value: 0.7981249234213611
       name: Spearman Euclidean
     - type: pearson_manhattan
-      value: 0.7941338912825391
       name: Pearson Manhattan
     - type: spearman_manhattan
-      value: 0.7997185742063436
       name: Spearman Manhattan
     - type: pearson_dot
-      value: 0.7022254885739367
       name: Pearson Dot
     - type: spearman_dot
-      value: 0.6857559655167198
       name: Spearman Dot
     - type: pearson_max
-      value: 0.8220285778407846
       name: Pearson Max
     - type: spearman_max
-      value: 0.8276471334482826
       name: Spearman Max
 ---
@@ -191,16 +191,16 @@ You can finetune this model on your own dataset.
 | Metric             | Value      |
 |:-------------------|:-----------|
-| pearson_cosine     | 0.822      |
-| spearman_cosine    | 0.8276     |
-| pearson_euclidean  | 0.7934     |
-| spearman_euclidean | 0.7981     |
-| pearson_manhattan  | 0.7941     |
 | spearman_manhattan | 0.7997     |
-| pearson_dot        | 0.7022     |
-| spearman_dot       | 0.6858     |
-| pearson_max        | 0.822      |
-| **spearman_max**   | **0.8276** |
 <!--
 ## Bias, Risks and Limitations
@@ -805,6 +805,31 @@ You can finetune this model on your own dataset.
 | 9.0455 | 3980 | 0.0638        | -               | -                    |
 | 9.0683 | 3990 | 0.0625        | -               | -                    |
 | 9.0911 | 4000 | 0.0665        | 0.0414          | 0.8276               |
 </details>

       type: sts_dev
     metrics:
     - type: pearson_cosine
+      value: 0.8220874775898197
       name: Pearson Cosine
     - type: spearman_cosine
+      value: 0.8282368218808581
       name: Spearman Cosine
     - type: pearson_euclidean
+      value: 0.7929031352092236
       name: Pearson Euclidean
     - type: spearman_euclidean
+      value: 0.7979913252239026
       name: Spearman Euclidean
     - type: pearson_manhattan
+      value: 0.7936882861676204
       name: Pearson Manhattan
     - type: spearman_manhattan
+      value: 0.7996541111809876
       name: Spearman Manhattan
     - type: pearson_dot
+      value: 0.7010536213435227
       name: Pearson Dot
     - type: spearman_dot
+      value: 0.6844746263331734
       name: Spearman Dot
     - type: pearson_max
+      value: 0.8220874775898197
       name: Pearson Max
     - type: spearman_max
+      value: 0.8282368218808581
       name: Spearman Max
 ---
 | Metric             | Value      |
 |:-------------------|:-----------|
+| pearson_cosine     | 0.8221     |
+| spearman_cosine    | 0.8282     |
+| pearson_euclidean  | 0.7929     |
+| spearman_euclidean | 0.798      |
+| pearson_manhattan  | 0.7937     |
 | spearman_manhattan | 0.7997     |
+| pearson_dot        | 0.7011     |
+| spearman_dot       | 0.6845     |
+| pearson_max        | 0.8221     |
+| **spearman_max**   | **0.8282** |
 <!--
 ## Bias, Risks and Limitations
 | 9.0455 | 3980 | 0.0638        | -               | -                    |
 | 9.0683 | 3990 | 0.0625        | -               | -                    |
 | 9.0911 | 4000 | 0.0665        | 0.0414          | 0.8276               |
+| 9.1138 | 4010 | 0.0624        | -               | -                    |
+| 9.1366 | 4020 | 0.0621        | -               | -                    |
+| 9.1593 | 4030 | 0.0648        | -               | -                    |
+| 9.1821 | 4040 | 0.0622        | -               | -                    |
+| 9.2049 | 4050 | 0.0635        | -               | -                    |
+| 9.2276 | 4060 | 0.061         | -               | -                    |
+| 9.2504 | 4070 | 0.0602        | -               | -                    |
+| 9.2732 | 4080 | 0.0613        | -               | -                    |
+| 9.2959 | 4090 | 0.0604        | -               | -                    |
+| 9.3187 | 4100 | 0.0623        | -               | -                    |
+| 9.3414 | 4110 | 0.0641        | -               | -                    |
+| 9.3642 | 4120 | 0.0635        | -               | -                    |
+| 9.3870 | 4130 | 0.0608        | -               | -                    |
+| 9.4097 | 4140 | 0.0611        | -               | -                    |
+| 9.4325 | 4150 | 0.0607        | -               | -                    |
+| 9.4553 | 4160 | 0.0631        | -               | -                    |
+| 9.4780 | 4170 | 0.0618        | -               | -                    |
+| 9.5008 | 4180 | 0.0609        | -               | -                    |
+| 9.5235 | 4190 | 0.0613        | -               | -                    |
+| 9.5463 | 4200 | 0.0606        | -               | -                    |
+| 9.5691 | 4210 | 0.0595        | -               | -                    |
+| 9.5918 | 4220 | 0.0609        | -               | -                    |
+| 9.6146 | 4230 | 0.061         | -               | -                    |
+| 9.6374 | 4240 | 0.0616        | -               | -                    |
+| 9.6601 | 4250 | 0.0613        | 0.0418          | 0.8282               |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca3c3bf8f87beab47d5a88d31e1b9bc66bde4d8a6aa0a7db8a8e23683e25777e
 size 735216376

 version https://git-lfs.github.com/spec/v1
+oid sha256:66daefb719ad12215c08363cf07f604053315b28142583dcc866c834327eca3f
 size 735216376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef993bcc74f9e91de236fcfb2956ae2eacb056d0348cc87b69e981352b953f7d
 size 1476823354

 version https://git-lfs.github.com/spec/v1
+oid sha256:796b01c86922133da7b4702097cf156006e03e00f92d857ba3d2713e738810f2
 size 1476823354

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:371c835359601369002ac0df9d2e47a8e77df500ebbf0208a4d9c71218241989
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:a734f96fdbf1b2b95f5a896a45ac06db48cebeba2dcddafafaf5c42500c1f8ba
 size 15920

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f24f7823ebb746566b89158a8e9007cbf065595314e547da3f5b253f5d6fb74
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:4179c3a5721b96913d5982f5899f5a8134fa075bf224efaaeb574cd846c07bbf
 size 15920

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37bb3351ac8c7870230a6937af6398f88591c9eb1caf43b5149e37c552a570b2
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:55f5c2af0a83fa2c2de4c1c2429806c3814277f1d706282352eeb894c157a06f
 size 15920

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eff1d37220ad38a5d635b06245abf11ad851da51e75384cbc4aa9966c0fa2932
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:cca4516c2bb67a2a1691e38c770742a680a94828f839610d2ffa43419db4feba
 size 15920

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b080d04a24d759d6724428633587a334c9ccd3796f670ea12513f73e504bc81
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cb45b1fc4043ea836f442423485d57eb9667bd00787e4c2417e1a25ab32a480
 size 15920

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5688abb5f99504d4856749cc7227131c7fc12204ecd8d80bc25dac63e630d98
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ec528339d849d7328578e52ee72da1edaa069275122e1908976fd336632067e
 size 15920

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a60b5c4cf8021e8b74d32d312519584aafee70c666b95e3799a07697027695ec
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:18f45c589b9a8c923ac9908849cfe569a36e99bfb6aaf6913e76e736935b42a2
 size 15920

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3fc79bb0d7d88c4bdb60e9b0f3298f97d9fbd1739fa04dd76de01b87a52b25a
 size 15920

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7571acf39c17540211b353a65ed07e95044bb1a68001f53b77c1f7bb674917b
 size 15920

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4789e0b1853e2658dba3c227017d8d2b1699b26df401842a8229cea3d0a058ea
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:451fe1a5f62f2f6eed0b67a70a5f8f0f813e8a38e58c106c948a6c2c9e79f8ef
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.091051358656992,
   "eval_steps": 250,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3095,6 +3095,199 @@
       "eval_sts_dev_spearman_manhattan": 0.7997185742063436,
       "eval_sts_dev_spearman_max": 0.8276471334482826,
       "step": 4000
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.660122350263196,
   "eval_steps": 250,
+  "global_step": 4250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_sts_dev_spearman_manhattan": 0.7997185742063436,
       "eval_sts_dev_spearman_max": 0.8276471334482826,
       "step": 4000
+    },
+    {
+      "epoch": 9.11381419832124,
+      "grad_norm": 0.23453885316848755,
+      "learning_rate": 2.852164017212561e-06,
+      "loss": 0.0624,
+      "step": 4010
+    },
+    {
+      "epoch": 9.13657703798549,
+      "grad_norm": 0.22881363332271576,
+      "learning_rate": 2.859276645684413e-06,
+      "loss": 0.0621,
+      "step": 4020
+    },
+    {
+      "epoch": 9.159339877649737,
+      "grad_norm": 0.21634767949581146,
+      "learning_rate": 2.866389274156265e-06,
+      "loss": 0.0648,
+      "step": 4030
+    },
+    {
+      "epoch": 9.182102717313985,
+      "grad_norm": 0.2653968334197998,
+      "learning_rate": 2.8735019026281164e-06,
+      "loss": 0.0622,
+      "step": 4040
+    },
+    {
+      "epoch": 9.204865556978232,
+      "grad_norm": 0.2806706726551056,
+      "learning_rate": 2.8806145310999684e-06,
+      "loss": 0.0635,
+      "step": 4050
+    },
+    {
+      "epoch": 9.227628396642482,
+      "grad_norm": 0.25029635429382324,
+      "learning_rate": 2.88772715957182e-06,
+      "loss": 0.061,
+      "step": 4060
+    },
+    {
+      "epoch": 9.25039123630673,
+      "grad_norm": 0.24983397126197815,
+      "learning_rate": 2.894839788043672e-06,
+      "loss": 0.0602,
+      "step": 4070
+    },
+    {
+      "epoch": 9.273154075970977,
+      "grad_norm": 0.21316730976104736,
+      "learning_rate": 2.9019524165155234e-06,
+      "loss": 0.0613,
+      "step": 4080
+    },
+    {
+      "epoch": 9.295916915635225,
+      "grad_norm": 0.21870028972625732,
+      "learning_rate": 2.9090650449873754e-06,
+      "loss": 0.0604,
+      "step": 4090
+    },
+    {
+      "epoch": 9.318679755299474,
+      "grad_norm": 0.21702495217323303,
+      "learning_rate": 2.9161776734592273e-06,
+      "loss": 0.0623,
+      "step": 4100
+    },
+    {
+      "epoch": 9.341442594963722,
+      "grad_norm": 0.22777798771858215,
+      "learning_rate": 2.923290301931079e-06,
+      "loss": 0.0641,
+      "step": 4110
+    },
+    {
+      "epoch": 9.36420543462797,
+      "grad_norm": 0.2656283378601074,
+      "learning_rate": 2.930402930402931e-06,
+      "loss": 0.0635,
+      "step": 4120
+    },
+    {
+      "epoch": 9.386968274292219,
+      "grad_norm": 0.23527038097381592,
+      "learning_rate": 2.9375155588747823e-06,
+      "loss": 0.0608,
+      "step": 4130
+    },
+    {
+      "epoch": 9.409731113956466,
+      "grad_norm": 0.21856476366519928,
+      "learning_rate": 2.9446281873466343e-06,
+      "loss": 0.0611,
+      "step": 4140
+    },
+    {
+      "epoch": 9.432493953620714,
+      "grad_norm": 0.23688729107379913,
+      "learning_rate": 2.951740815818486e-06,
+      "loss": 0.0607,
+      "step": 4150
+    },
+    {
+      "epoch": 9.455256793284962,
+      "grad_norm": 0.26457446813583374,
+      "learning_rate": 2.9588534442903377e-06,
+      "loss": 0.0631,
+      "step": 4160
+    },
+    {
+      "epoch": 9.478019632949211,
+      "grad_norm": 0.31578782200813293,
+      "learning_rate": 2.9659660727621897e-06,
+      "loss": 0.0618,
+      "step": 4170
+    },
+    {
+      "epoch": 9.500782472613459,
+      "grad_norm": 0.23187491297721863,
+      "learning_rate": 2.9730787012340412e-06,
+      "loss": 0.0609,
+      "step": 4180
+    },
+    {
+      "epoch": 9.523545312277706,
+      "grad_norm": 0.24577929079532623,
+      "learning_rate": 2.980191329705893e-06,
+      "loss": 0.0613,
+      "step": 4190
+    },
+    {
+      "epoch": 9.546308151941954,
+      "grad_norm": 0.23201169073581696,
+      "learning_rate": 2.9873039581777447e-06,
+      "loss": 0.0606,
+      "step": 4200
+    },
+    {
+      "epoch": 9.569070991606203,
+      "grad_norm": 0.2860512137413025,
+      "learning_rate": 2.9944165866495967e-06,
+      "loss": 0.0595,
+      "step": 4210
+    },
+    {
+      "epoch": 9.591833831270451,
+      "grad_norm": 0.237753763794899,
+      "learning_rate": 3.001529215121448e-06,
+      "loss": 0.0609,
+      "step": 4220
+    },
+    {
+      "epoch": 9.614596670934699,
+      "grad_norm": 0.23422682285308838,
+      "learning_rate": 3.0086418435933e-06,
+      "loss": 0.061,
+      "step": 4230
+    },
+    {
+      "epoch": 9.637359510598948,
+      "grad_norm": 0.2497267723083496,
+      "learning_rate": 3.015754472065152e-06,
+      "loss": 0.0616,
+      "step": 4240
+    },
+    {
+      "epoch": 9.660122350263196,
+      "grad_norm": 0.2505936622619629,
+      "learning_rate": 3.0228671005370036e-06,
+      "loss": 0.0613,
+      "step": 4250
+    },
+    {
+      "epoch": 9.660122350263196,
+      "eval_loss": 0.04175787419080734,
+      "eval_runtime": 3.1427,
+      "eval_samples_per_second": 477.3,
+      "eval_steps_per_second": 7.637,
+      "eval_sts_dev_pearson_cosine": 0.8220874775898197,
+      "eval_sts_dev_pearson_dot": 0.7010536213435227,
+      "eval_sts_dev_pearson_euclidean": 0.7929031352092236,
+      "eval_sts_dev_pearson_manhattan": 0.7936882861676204,
+      "eval_sts_dev_pearson_max": 0.8220874775898197,
+      "eval_sts_dev_spearman_cosine": 0.8282368218808581,
+      "eval_sts_dev_spearman_dot": 0.6844746263331734,
+      "eval_sts_dev_spearman_euclidean": 0.7979913252239026,
+      "eval_sts_dev_spearman_manhattan": 0.7996541111809876,
+      "eval_sts_dev_spearman_max": 0.8282368218808581,
+      "step": 4250
     }
   ],
   "logging_steps": 10,