Training in progress, step 4000, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/README.md +43 -33
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/README.md CHANGED Viewed

@@ -349,49 +349,49 @@ model-index:
       type: NanoNFCorpus
     metrics:
     - type: dot_accuracy@1
-      value: 0.28
       name: Dot Accuracy@1
     - type: dot_accuracy@3
-      value: 0.38
       name: Dot Accuracy@3
     - type: dot_accuracy@5
-      value: 0.5
       name: Dot Accuracy@5
     - type: dot_accuracy@10
-      value: 0.6
       name: Dot Accuracy@10
     - type: dot_precision@1
-      value: 0.28
       name: Dot Precision@1
     - type: dot_precision@3
-      value: 0.26666666666666666
       name: Dot Precision@3
     - type: dot_precision@5
-      value: 0.268
       name: Dot Precision@5
     - type: dot_precision@10
-      value: 0.22799999999999998
       name: Dot Precision@10
     - type: dot_recall@1
-      value: 0.020245923941945597
       name: Dot Recall@1
     - type: dot_recall@3
-      value: 0.05035545265818719
       name: Dot Recall@3
     - type: dot_recall@5
-      value: 0.06636231438770367
       name: Dot Recall@5
     - type: dot_recall@10
-      value: 0.10965738342516877
       name: Dot Recall@10
     - type: dot_ndcg@10
-      value: 0.25962417542013744
       name: Dot Ndcg@10
     - type: dot_mrr@10
-      value: 0.3617460317460317
       name: Dot Mrr@10
     - type: dot_map@100
-      value: 0.10860788103772646
       name: Dot Map@100
     - type: query_active_dims
       value: 51200.0
@@ -471,9 +471,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[201.5240,  24.5641,   5.0840],
-#         [ 24.5641,  34.0186,   3.3158],
-#         [  5.0840,   3.3158,  29.2518]])
 ```
 <!--
@@ -511,21 +511,21 @@ You can finetune this model on your own dataset.
 | Metric                | Value      |
 |:----------------------|:-----------|
-| dot_accuracy@1        | 0.28       |
-| dot_accuracy@3        | 0.38       |
-| dot_accuracy@5        | 0.5        |
-| dot_accuracy@10       | 0.6        |
-| dot_precision@1       | 0.28       |
-| dot_precision@3       | 0.2667     |
-| dot_precision@5       | 0.268      |
-| dot_precision@10      | 0.228      |
-| dot_recall@1          | 0.0202     |
-| dot_recall@3          | 0.0504     |
-| dot_recall@5          | 0.0664     |
-| dot_recall@10         | 0.1097     |
-| **dot_ndcg@10**       | **0.2596** |
-| dot_mrr@10            | 0.3617     |
-| dot_map@100           | 0.1086     |
 | query_active_dims     | 51200.0    |
 | query_sparsity_ratio  | 0.0        |
 | corpus_active_dims    | 51200.0    |
@@ -818,6 +818,16 @@ You can finetune this model on your own dataset.
 | 1.0038 | 3400 | 0.2065        | -        | -                        |
 | 1.0186 | 3450 | 0.17          | -        | -                        |
 | 1.0334 | 3500 | 0.179         | 0.7303   | 0.2596                   |
 ### Framework Versions

       type: NanoNFCorpus
     metrics:
     - type: dot_accuracy@1
+      value: 0.32
       name: Dot Accuracy@1
     - type: dot_accuracy@3
+      value: 0.4
       name: Dot Accuracy@3
     - type: dot_accuracy@5
+      value: 0.52
       name: Dot Accuracy@5
     - type: dot_accuracy@10
+      value: 0.62
       name: Dot Accuracy@10
     - type: dot_precision@1
+      value: 0.32
       name: Dot Precision@1
     - type: dot_precision@3
+      value: 0.2866666666666666
       name: Dot Precision@3
     - type: dot_precision@5
+      value: 0.276
       name: Dot Precision@5
     - type: dot_precision@10
+      value: 0.23800000000000002
       name: Dot Precision@10
     - type: dot_recall@1
+      value: 0.020779419687305747
       name: Dot Recall@1
     - type: dot_recall@3
+      value: 0.05318433469893364
       name: Dot Recall@3
     - type: dot_recall@5
+      value: 0.08697326661296835
       name: Dot Recall@5
     - type: dot_recall@10
+      value: 0.11024946650778294
       name: Dot Recall@10
     - type: dot_ndcg@10
+      value: 0.2731246838532083
       name: Dot Ndcg@10
     - type: dot_mrr@10
+      value: 0.3876349206349206
       name: Dot Mrr@10
     - type: dot_map@100
+      value: 0.11211568501359778
       name: Dot Map@100
     - type: query_active_dims
       value: 51200.0
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[148.3932,  17.6451,   3.8017],
+#         [ 17.6451,  23.1705,   2.1552],
+#         [  3.8017,   2.1552,  18.1152]])
 ```
 <!--
 | Metric                | Value      |
 |:----------------------|:-----------|
+| dot_accuracy@1        | 0.32       |
+| dot_accuracy@3        | 0.4        |
+| dot_accuracy@5        | 0.52       |
+| dot_accuracy@10       | 0.62       |
+| dot_precision@1       | 0.32       |
+| dot_precision@3       | 0.2867     |
+| dot_precision@5       | 0.276      |
+| dot_precision@10      | 0.238      |
+| dot_recall@1          | 0.0208     |
+| dot_recall@3          | 0.0532     |
+| dot_recall@5          | 0.087      |
+| dot_recall@10         | 0.1102     |
+| **dot_ndcg@10**       | **0.2731** |
+| dot_mrr@10            | 0.3876     |
+| dot_map@100           | 0.1121     |
 | query_active_dims     | 51200.0    |
 | query_sparsity_ratio  | 0.0        |
 | corpus_active_dims    | 51200.0    |
 | 1.0038 | 3400 | 0.2065        | -        | -                        |
 | 1.0186 | 3450 | 0.17          | -        | -                        |
 | 1.0334 | 3500 | 0.179         | 0.7303   | 0.2596                   |
+| 1.0481 | 3550 | 0.1848        | -        | -                        |
+| 1.0629 | 3600 | 0.1935        | -        | -                        |
+| 1.0776 | 3650 | 0.1795        | -        | -                        |
+| 1.0924 | 3700 | 0.1524        | -        | -                        |
+| 1.1072 | 3750 | 0.1542        | -        | -                        |
+| 1.1219 | 3800 | 0.1845        | -        | -                        |
+| 1.1367 | 3850 | 0.1568        | -        | -                        |
+| 1.1515 | 3900 | 0.1584        | -        | -                        |
+| 1.1662 | 3950 | 0.1484        | -        | -                        |
+| 1.1810 | 4000 | 0.1249        | 0.6119   | 0.2731                   |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b65e0f1ba2ea666d4d83b82d443b2ef255d3451253ac391b703b3cff507ac744
 size 728561776

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bcd4e6a8ab56a0db807ca31054934e51802d2eeaa5a038d39f551cacff14347
 size 728561776

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b816a8fc9da81a604d92d18ab9c0fa78933d78143e853332cde1327f2d45484
 size 1457369077

 version https://git-lfs.github.com/spec/v1
+oid sha256:21da268aff2f83d8014837d41fbcfa1beee4ef6ef72e3199bf45ce5ced8a2c48
 size 1457369077

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:636a367cda4d50c8924d561ea7f35a51e3ec83088ad3e160d2a1c3155e15cfa2
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cd1ea480bf72a4e862c1b766d1db801ba2b3f5895e34f5947476735dcd46078
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:544c6732f714f6e1e189df798d020c312f287dedcac7dc7b9ea594b76375666d
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:80fd94e2538428fa98737736853b585ed501ddd45dd103736afef286e04732c6
 size 14917

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:209eecd533b0372305e2eb37b43597d1a5a586610dd414759fd9d46c438f6af6
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c5cd7e5a8b9d1b2f988217fde63eff5a8de41264597b7828a91c46e81ed2ca5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 1500,
   "best_metric": 0.2777906849504241,
   "best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
-  "epoch": 1.0333628579864187,
   "eval_steps": 500,
-  "global_step": 3500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1044,6 +1044,154 @@
       "eval_eti_samples_per_second": 8.192,
       "eval_eti_steps_per_second": 0.259,
       "step": 3500
     }
   ],
   "logging_steps": 50,
@@ -1058,7 +1206,7 @@
         "early_stopping_threshold": 0.001
       },
       "attributes": {
-        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
@@ -1067,7 +1215,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": 1500,
   "best_metric": 0.2777906849504241,
   "best_model_checkpoint": "models/splade-norbert4-base-eti/checkpoint-1500",
+  "epoch": 1.1809861234130499,
   "eval_steps": 500,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_eti_samples_per_second": 8.192,
       "eval_eti_steps_per_second": 0.259,
       "step": 3500
+    },
+    {
+      "base_loss": 0.1651,
+      "document_regularizer_loss": 0.0161,
+      "epoch": 1.0481251845290818,
+      "grad_norm": 1.5739084482192993,
+      "learning_rate": 7.2309711286089245e-06,
+      "loss": 0.1848,
+      "query_regularizer_loss": 0.0036,
+      "step": 3550
+    },
+    {
+      "base_loss": 0.1741,
+      "document_regularizer_loss": 0.015,
+      "epoch": 1.0628875110717448,
+      "grad_norm": 2.5850167274475098,
+      "learning_rate": 7.1762904636920395e-06,
+      "loss": 0.1935,
+      "query_regularizer_loss": 0.0044,
+      "step": 3600
+    },
+    {
+      "base_loss": 0.1598,
+      "document_regularizer_loss": 0.0158,
+      "epoch": 1.0776498376144081,
+      "grad_norm": 4.595969200134277,
+      "learning_rate": 7.1216097987751545e-06,
+      "loss": 0.1795,
+      "query_regularizer_loss": 0.0039,
+      "step": 3650
+    },
+    {
+      "base_loss": 0.1322,
+      "document_regularizer_loss": 0.0165,
+      "epoch": 1.0924121641570712,
+      "grad_norm": 2.4553780555725098,
+      "learning_rate": 7.066929133858268e-06,
+      "loss": 0.1524,
+      "query_regularizer_loss": 0.0038,
+      "step": 3700
+    },
+    {
+      "base_loss": 0.1353,
+      "document_regularizer_loss": 0.0149,
+      "epoch": 1.1071744906997343,
+      "grad_norm": 3.1925532817840576,
+      "learning_rate": 7.012248468941383e-06,
+      "loss": 0.1542,
+      "query_regularizer_loss": 0.0039,
+      "step": 3750
+    },
+    {
+      "base_loss": 0.1653,
+      "document_regularizer_loss": 0.0158,
+      "epoch": 1.1219368172423974,
+      "grad_norm": 7.258732795715332,
+      "learning_rate": 6.957567804024498e-06,
+      "loss": 0.1845,
+      "query_regularizer_loss": 0.0035,
+      "step": 3800
+    },
+    {
+      "base_loss": 0.1357,
+      "document_regularizer_loss": 0.0172,
+      "epoch": 1.1366991437850604,
+      "grad_norm": 2.0559401512145996,
+      "learning_rate": 6.902887139107613e-06,
+      "loss": 0.1568,
+      "query_regularizer_loss": 0.0039,
+      "step": 3850
+    },
+    {
+      "base_loss": 0.1363,
+      "document_regularizer_loss": 0.0177,
+      "epoch": 1.1514614703277237,
+      "grad_norm": 11.927323341369629,
+      "learning_rate": 6.848206474190728e-06,
+      "loss": 0.1584,
+      "query_regularizer_loss": 0.0045,
+      "step": 3900
+    },
+    {
+      "base_loss": 0.1271,
+      "document_regularizer_loss": 0.0171,
+      "epoch": 1.1662237968703868,
+      "grad_norm": 4.851423263549805,
+      "learning_rate": 6.793525809273841e-06,
+      "loss": 0.1484,
+      "query_regularizer_loss": 0.0041,
+      "step": 3950
+    },
+    {
+      "base_loss": 0.105,
+      "document_regularizer_loss": 0.0164,
+      "epoch": 1.1809861234130499,
+      "grad_norm": 4.975285530090332,
+      "learning_rate": 6.738845144356956e-06,
+      "loss": 0.1249,
+      "query_regularizer_loss": 0.0035,
+      "step": 4000
+    },
+    {
+      "epoch": 1.1809861234130499,
+      "eval_NanoBEIR_mean_avg_flops": 51200.0,
+      "eval_NanoBEIR_mean_corpus_active_dims": 51200.0,
+      "eval_NanoBEIR_mean_corpus_sparsity_ratio": 0.0,
+      "eval_NanoBEIR_mean_dot_accuracy@1": 0.32,
+      "eval_NanoBEIR_mean_dot_accuracy@10": 0.62,
+      "eval_NanoBEIR_mean_dot_accuracy@3": 0.4,
+      "eval_NanoBEIR_mean_dot_accuracy@5": 0.52,
+      "eval_NanoBEIR_mean_dot_map@100": 0.11211568501359778,
+      "eval_NanoBEIR_mean_dot_mrr@10": 0.3876349206349206,
+      "eval_NanoBEIR_mean_dot_ndcg@10": 0.2731246838532083,
+      "eval_NanoBEIR_mean_dot_precision@1": 0.32,
+      "eval_NanoBEIR_mean_dot_precision@10": 0.23800000000000002,
+      "eval_NanoBEIR_mean_dot_precision@3": 0.2866666666666666,
+      "eval_NanoBEIR_mean_dot_precision@5": 0.276,
+      "eval_NanoBEIR_mean_dot_recall@1": 0.020779419687305747,
+      "eval_NanoBEIR_mean_dot_recall@10": 0.11024946650778294,
+      "eval_NanoBEIR_mean_dot_recall@3": 0.05318433469893364,
+      "eval_NanoBEIR_mean_dot_recall@5": 0.08697326661296835,
+      "eval_NanoBEIR_mean_query_active_dims": 51200.0,
+      "eval_NanoBEIR_mean_query_sparsity_ratio": 0.0,
+      "eval_NanoNFCorpus_avg_flops": 51200.0,
+      "eval_NanoNFCorpus_corpus_active_dims": 51200.0,
+      "eval_NanoNFCorpus_corpus_sparsity_ratio": 0.0,
+      "eval_NanoNFCorpus_dot_accuracy@1": 0.32,
+      "eval_NanoNFCorpus_dot_accuracy@10": 0.62,
+      "eval_NanoNFCorpus_dot_accuracy@3": 0.4,
+      "eval_NanoNFCorpus_dot_accuracy@5": 0.52,
+      "eval_NanoNFCorpus_dot_map@100": 0.11211568501359778,
+      "eval_NanoNFCorpus_dot_mrr@10": 0.3876349206349206,
+      "eval_NanoNFCorpus_dot_ndcg@10": 0.2731246838532083,
+      "eval_NanoNFCorpus_dot_precision@1": 0.32,
+      "eval_NanoNFCorpus_dot_precision@10": 0.23800000000000002,
+      "eval_NanoNFCorpus_dot_precision@3": 0.2866666666666666,
+      "eval_NanoNFCorpus_dot_precision@5": 0.276,
+      "eval_NanoNFCorpus_dot_recall@1": 0.020779419687305747,
+      "eval_NanoNFCorpus_dot_recall@10": 0.11024946650778294,
+      "eval_NanoNFCorpus_dot_recall@3": 0.05318433469893364,
+      "eval_NanoNFCorpus_dot_recall@5": 0.08697326661296835,
+      "eval_NanoNFCorpus_query_active_dims": 51200.0,
+      "eval_NanoNFCorpus_query_sparsity_ratio": 0.0,
+      "eval_eti_loss": 0.6119212508201599,
+      "eval_eti_runtime": 88.0723,
+      "eval_eti_samples_per_second": 8.629,
+      "eval_eti_steps_per_second": 0.273,
+      "step": 4000
     }
   ],
   "logging_steps": 50,
         "early_stopping_threshold": 0.001
       },
       "attributes": {
+        "early_stopping_patience_counter": 5
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }