Training in progress, step 10423, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/README.md +11 -3
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -3

last-checkpoint/README.md CHANGED Viewed

@@ -184,9 +184,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[26.0330, 15.2864,  1.5172],
-#         [15.2864, 18.1790,  1.4252],
-#         [ 1.5172,  1.4252, 10.7558]])
 ```
 <!--
@@ -664,6 +664,14 @@ You can finetune this model on your own dataset.
 | 0.9498 | 9900  | 0.0949        | -               | -                        |
 | 0.9546 | 9950  | 0.0821        | -               | -                        |
 | 0.9594 | 10000 | 0.0703        | 0.4128          | 0.1965                   |
 </details>

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[24.4025, 13.9460,  1.4325],
+#         [13.9460, 16.7882,  1.3582],
+#         [ 1.4325,  1.3582, 10.2211]])
 ```
 <!--
 | 0.9498 | 9900  | 0.0949        | -               | -                        |
 | 0.9546 | 9950  | 0.0821        | -               | -                        |
 | 0.9594 | 10000 | 0.0703        | 0.4128          | 0.1965                   |
+| 0.9642 | 10050 | 0.1004        | -               | -                        |
+| 0.9690 | 10100 | 0.0985        | -               | -                        |
+| 0.9738 | 10150 | 0.0906        | -               | -                        |
+| 0.9786 | 10200 | 0.0991        | -               | -                        |
+| 0.9834 | 10250 | 0.0811        | -               | -                        |
+| 0.9882 | 10300 | 0.1002        | -               | -                        |
+| 0.9930 | 10350 | 0.0872        | -               | -                        |
+| 0.9978 | 10400 | 0.1106        | -               | -                        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6e51ef566acf288b0d358a37576bde59dc6b167b6a53ed1857a9b3c7d17aafc
 size 728561776

 version https://git-lfs.github.com/spec/v1
+oid sha256:14ac37cabac3c2a855ab2457343f660268bc81e8d0e56283536af5e9b8ee54aa
 size 728561776

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2a5fe0e654c9c6bf3638901cf2845a4adb158933e39f5eda410cb3cc0bd44c2
 size 1457369077

 version https://git-lfs.github.com/spec/v1
+oid sha256:16845e30cdd6a1aa5713c4ac7c985582bc729ad8423f628eab97f2533acd7c69
 size 1457369077

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57d9d7558c709b6a972ce8ad22ae68adf5da98342c109a0c8ae20210d62a4e5a
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:75eaade8cd62d6b12db3659471111ec66106cdaf49d1adb4716aa9b8893ced8e
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2df9de5796b1324aede75beded346eab735a6ced51772973f92463378a41bb3e
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:523043000b1d832228380e68f0d4b68b296b7aacace12192adec8a11d32d0346
 size 14917

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e355c146c3d8245bc7493652056f88cd1816cc428f732f82ec17ea73202220c
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d8a685c45c8f559dab2fc37e9c9ee050f23f22c48d6988da111853aad98abdb
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 1500,
   "best_metric": 0.27095313455750514,
   "best_model_checkpoint": "models/splade-norbert4-base-retrieval-only/checkpoint-1500",
-  "epoch": 0.9594166746618056,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3028,6 +3028,86 @@
       "eval_samples_per_second": 39.94,
       "eval_steps_per_second": 0.624,
       "step": 10000
     }
   ],
   "logging_steps": 50,
@@ -3042,7 +3122,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": 1500,
   "best_metric": 0.27095313455750514,
   "best_model_checkpoint": "models/splade-norbert4-base-retrieval-only/checkpoint-1500",
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 10423,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.94,
       "eval_steps_per_second": 0.624,
       "step": 10000
+    },
+    {
+      "base_loss": 0.0908,
+      "document_regularizer_loss": 0.0073,
+      "epoch": 0.9642137580351147,
+      "grad_norm": 0.605567991733551,
+      "learning_rate": 7.974413646055437e-07,
+      "loss": 0.1004,
+      "query_regularizer_loss": 0.0022,
+      "step": 10050
+    },
+    {
+      "base_loss": 0.0893,
+      "document_regularizer_loss": 0.007,
+      "epoch": 0.9690108414084236,
+      "grad_norm": 0.786066472530365,
+      "learning_rate": 6.908315565031984e-07,
+      "loss": 0.0985,
+      "query_regularizer_loss": 0.0021,
+      "step": 10100
+    },
+    {
+      "base_loss": 0.0814,
+      "document_regularizer_loss": 0.0072,
+      "epoch": 0.9738079247817327,
+      "grad_norm": 3.1570448875427246,
+      "learning_rate": 5.842217484008529e-07,
+      "loss": 0.0906,
+      "query_regularizer_loss": 0.0021,
+      "step": 10150
+    },
+    {
+      "base_loss": 0.0897,
+      "document_regularizer_loss": 0.0072,
+      "epoch": 0.9786050081550417,
+      "grad_norm": 9.441202163696289,
+      "learning_rate": 4.776119402985075e-07,
+      "loss": 0.0991,
+      "query_regularizer_loss": 0.0022,
+      "step": 10200
+    },
+    {
+      "base_loss": 0.0715,
+      "document_regularizer_loss": 0.0074,
+      "epoch": 0.9834020915283508,
+      "grad_norm": 0.39448684453964233,
+      "learning_rate": 3.710021321961621e-07,
+      "loss": 0.0811,
+      "query_regularizer_loss": 0.0022,
+      "step": 10250
+    },
+    {
+      "base_loss": 0.0909,
+      "document_regularizer_loss": 0.0072,
+      "epoch": 0.9881991749016598,
+      "grad_norm": 8.17419147491455,
+      "learning_rate": 2.6439232409381664e-07,
+      "loss": 0.1002,
+      "query_regularizer_loss": 0.0021,
+      "step": 10300
+    },
+    {
+      "base_loss": 0.0779,
+      "document_regularizer_loss": 0.0072,
+      "epoch": 0.9929962582749688,
+      "grad_norm": 2.363359212875366,
+      "learning_rate": 1.5778251599147122e-07,
+      "loss": 0.0872,
+      "query_regularizer_loss": 0.0021,
+      "step": 10350
+    },
+    {
+      "base_loss": 0.1014,
+      "document_regularizer_loss": 0.0071,
+      "epoch": 0.9977933416482778,
+      "grad_norm": 2.649932861328125,
+      "learning_rate": 5.1172707889125806e-08,
+      "loss": 0.1106,
+      "query_regularizer_loss": 0.002,
+      "step": 10400
     }
   ],
   "logging_steps": 50,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }