Training in progress, step 2000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/README.md +38 -33
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +62 -4

last-checkpoint/README.md CHANGED Viewed

@@ -406,49 +406,49 @@ model-index:
       type: group_b_retrieval
     metrics:
     - type: cosine_accuracy@1
-      value: 0.8017153924190722
       name: Cosine Accuracy@1
     - type: cosine_accuracy@3
-      value: 0.877524670294199
       name: Cosine Accuracy@3
     - type: cosine_accuracy@5
-      value: 0.8977220326477912
       name: Cosine Accuracy@5
     - type: cosine_accuracy@10
-      value: 0.9156137600295121
       name: Cosine Accuracy@10
     - type: cosine_precision@1
-      value: 0.8017153924190722
       name: Cosine Precision@1
     - type: cosine_precision@3
-      value: 0.2925082234313997
       name: Cosine Precision@3
     - type: cosine_precision@5
-      value: 0.17954440652955822
       name: Cosine Precision@5
     - type: cosine_precision@10
-      value: 0.09156137600295121
       name: Cosine Precision@10
     - type: cosine_recall@1
-      value: 0.8017153924190722
       name: Cosine Recall@1
     - type: cosine_recall@3
-      value: 0.877524670294199
       name: Cosine Recall@3
     - type: cosine_recall@5
-      value: 0.8977220326477912
       name: Cosine Recall@5
     - type: cosine_recall@10
-      value: 0.9156137600295121
       name: Cosine Recall@10
     - type: cosine_ndcg@10
-      value: 0.8612148814875222
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
-      value: 0.8434970480552897
       name: Cosine Mrr@10
     - type: cosine_map@100
-      value: 0.8456366871531872
       name: Cosine Map@100
 ---
@@ -516,7 +516,7 @@ print(query_embeddings.shape, document_embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(query_embeddings, document_embeddings)
 print(similarities)
-# tensor([[ 0.7469, -0.0002,  0.0699]])
 ```
 <!--
@@ -552,23 +552,23 @@ You can finetune this model on your own dataset.
 * Dataset: `group_b_retrieval`
 * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
-| Metric              | Value      |
-|:--------------------|:-----------|
-| cosine_accuracy@1   | 0.8017     |
-| cosine_accuracy@3   | 0.8775     |
-| cosine_accuracy@5   | 0.8977     |
-| cosine_accuracy@10  | 0.9156     |
-| cosine_precision@1  | 0.8017     |
-| cosine_precision@3  | 0.2925     |
-| cosine_precision@5  | 0.1795     |
-| cosine_precision@10 | 0.0916     |
-| cosine_recall@1     | 0.8017     |
-| cosine_recall@3     | 0.8775     |
-| cosine_recall@5     | 0.8977     |
-| cosine_recall@10    | 0.9156     |
-| **cosine_ndcg@10**  | **0.8612** |
-| cosine_mrr@10       | 0.8435     |
-| cosine_map@100      | 0.8456     |
 <!--
 ## Bias, Risks and Limitations
@@ -798,6 +798,11 @@ You can finetune this model on your own dataset.
 | 0.4035 | 1300 | 0.1267        | -               | -                                |
 | 0.4345 | 1400 | 0.1089        | -               | -                                |
 | 0.4655 | 1500 | 0.1069        | 0.1850          | 0.8612                           |
 ### Framework Versions

       type: group_b_retrieval
     metrics:
     - type: cosine_accuracy@1
+      value: 0.7896338651664668
       name: Cosine Accuracy@1
     - type: cosine_accuracy@3
+      value: 0.8709766669740847
       name: Cosine Accuracy@3
     - type: cosine_accuracy@5
+      value: 0.8913584801254265
       name: Cosine Accuracy@5
     - type: cosine_accuracy@10
+      value: 0.9114636170801439
       name: Cosine Accuracy@10
     - type: cosine_precision@1
+      value: 0.7896338651664668
       name: Cosine Precision@1
     - type: cosine_precision@3
+      value: 0.2903255556580282
       name: Cosine Precision@3
     - type: cosine_precision@5
+      value: 0.17827169602508527
       name: Cosine Precision@5
     - type: cosine_precision@10
+      value: 0.09114636170801438
       name: Cosine Precision@10
     - type: cosine_recall@1
+      value: 0.7896338651664668
       name: Cosine Recall@1
     - type: cosine_recall@3
+      value: 0.8709766669740847
       name: Cosine Recall@3
     - type: cosine_recall@5
+      value: 0.8913584801254265
       name: Cosine Recall@5
     - type: cosine_recall@10
+      value: 0.9114636170801439
       name: Cosine Recall@10
     - type: cosine_ndcg@10
+      value: 0.85301140683731
       name: Cosine Ndcg@10
     - type: cosine_mrr@10
+      value: 0.8340159110771488
       name: Cosine Mrr@10
     - type: cosine_map@100
+      value: 0.8361969274027575
       name: Cosine Map@100
 ---
 # Get the similarity scores for the embeddings
 similarities = model.similarity(query_embeddings, document_embeddings)
 print(similarities)
+# tensor([[ 0.8043, -0.0173,  0.0371]])
 ```
 <!--
 * Dataset: `group_b_retrieval`
 * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
+| Metric              | Value     |
+|:--------------------|:----------|
+| cosine_accuracy@1   | 0.7896    |
+| cosine_accuracy@3   | 0.871     |
+| cosine_accuracy@5   | 0.8914    |
+| cosine_accuracy@10  | 0.9115    |
+| cosine_precision@1  | 0.7896    |
+| cosine_precision@3  | 0.2903    |
+| cosine_precision@5  | 0.1783    |
+| cosine_precision@10 | 0.0911    |
+| cosine_recall@1     | 0.7896    |
+| cosine_recall@3     | 0.871     |
+| cosine_recall@5     | 0.8914    |
+| cosine_recall@10    | 0.9115    |
+| **cosine_ndcg@10**  | **0.853** |
+| cosine_mrr@10       | 0.834     |
+| cosine_map@100      | 0.8362    |
 <!--
 ## Bias, Risks and Limitations
 | 0.4035 | 1300 | 0.1267        | -               | -                                |
 | 0.4345 | 1400 | 0.1089        | -               | -                                |
 | 0.4655 | 1500 | 0.1069        | 0.1850          | 0.8612                           |
+| 0.4966 | 1600 | 0.1144        | -               | -                                |
+| 0.5276 | 1700 | 0.1059        | -               | -                                |
+| 0.5587 | 1800 | 0.0966        | -               | -                                |
+| 0.5897 | 1900 | 0.1191        | -               | -                                |
+| 0.6207 | 2000 | 0.0964        | 0.1964          | 0.8530                           |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38b461d393342010280e75a27699c7e1fe222c1f9d612de3c659d7a9dab63596
 size 595640976

 version https://git-lfs.github.com/spec/v1
+oid sha256:76978de5f2e075e8aa79b5afc05c75d738cbe21cabbfdaed1a48a4269a4866f3
 size 595640976

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7dd30a9d85da2724d630510ec0808cdee92ad416551bfd14e6bcc1e2220c13c
 size 1191508006

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a8115e8405542064ec7230c4c48017e022fb3b37ea793f0cf2c45a84b1bcfed
 size 1191508006

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02cd2bf499625e845bf2c22f35fde877cabef4a1d7cd9152e02bddf16ac17428
 size 14391

 version https://git-lfs.github.com/spec/v1
+oid sha256:b62e751c4256ced4de30a5b4b7f0c7deba173870a9142388b36fe30d1438a332
 size 14391

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b249816ee3fb553b903ac6355be3c046e74b47b6d09e070158ac72d91b74e658
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f446ffc28f743e9353c6eb0384dda3f80dababeb4062e70b3a2f57d5ddafd96
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 500,
   "best_metric": 0.8750381759774116,
   "best_model_checkpoint": "models/norbert4-v6-stage2-group-b/checkpoint-500",
-  "epoch": 0.4655493482309125,
   "eval_steps": 500,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -182,6 +182,64 @@
       "eval_samples_per_second": 63.731,
       "eval_steps_per_second": 0.999,
       "step": 1500
     }
   ],
   "logging_steps": 100,
@@ -196,7 +254,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -205,7 +263,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": 500,
   "best_metric": 0.8750381759774116,
   "best_model_checkpoint": "models/norbert4-v6-stage2-group-b/checkpoint-500",
+  "epoch": 0.6207324643078833,
   "eval_steps": 500,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 63.731,
       "eval_steps_per_second": 0.999,
       "step": 1500
+    },
+    {
+      "epoch": 0.4965859714463066,
+      "grad_norm": 2.73714542388916,
+      "learning_rate": 1.1869129192350805e-05,
+      "loss": 0.1144,
+      "step": 1600
+    },
+    {
+      "epoch": 0.5276225946617008,
+      "grad_norm": 1.8794718980789185,
+      "learning_rate": 1.0795663964861532e-05,
+      "loss": 0.1059,
+      "step": 1700
+    },
+    {
+      "epoch": 0.5586592178770949,
+      "grad_norm": 1.8753403425216675,
+      "learning_rate": 9.712863874436337e-06,
+      "loss": 0.0966,
+      "step": 1800
+    },
+    {
+      "epoch": 0.5896958410924892,
+      "grad_norm": 0.4708488881587982,
+      "learning_rate": 8.633432513141098e-06,
+      "loss": 0.1191,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6207324643078833,
+      "grad_norm": 0.937002956867218,
+      "learning_rate": 7.570033950547176e-06,
+      "loss": 0.0964,
+      "step": 2000
+    },
+    {
+      "epoch": 0.6207324643078833,
+      "eval_group_b_retrieval_cosine_accuracy@1": 0.7896338651664668,
+      "eval_group_b_retrieval_cosine_accuracy@10": 0.9114636170801439,
+      "eval_group_b_retrieval_cosine_accuracy@3": 0.8709766669740847,
+      "eval_group_b_retrieval_cosine_accuracy@5": 0.8913584801254265,
+      "eval_group_b_retrieval_cosine_map@100": 0.8361969274027575,
+      "eval_group_b_retrieval_cosine_mrr@10": 0.8340159110771488,
+      "eval_group_b_retrieval_cosine_ndcg@10": 0.85301140683731,
+      "eval_group_b_retrieval_cosine_precision@1": 0.7896338651664668,
+      "eval_group_b_retrieval_cosine_precision@10": 0.09114636170801438,
+      "eval_group_b_retrieval_cosine_precision@3": 0.2903255556580282,
+      "eval_group_b_retrieval_cosine_precision@5": 0.17827169602508527,
+      "eval_group_b_retrieval_cosine_recall@1": 0.7896338651664668,
+      "eval_group_b_retrieval_cosine_recall@10": 0.9114636170801439,
+      "eval_group_b_retrieval_cosine_recall@3": 0.8709766669740847,
+      "eval_group_b_retrieval_cosine_recall@5": 0.8913584801254265,
+      "eval_loss": 0.19638657569885254,
+      "eval_runtime": 167.4057,
+      "eval_samples_per_second": 64.771,
+      "eval_steps_per_second": 1.015,
+      "step": 2000
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }