Training in progress, step 4600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.5709, 0.1243],
-#         [0.5709, 1.0000, 0.1388],
-#         [0.1243, 0.1388, 1.0000]])
 ```
 <!--
@@ -1312,6 +1312,10 @@ You can finetune this model on your own dataset.
 | 0.0760 | 4300 | 0.4779        |
 | 0.0769 | 4350 | 0.4463        |
 | 0.0778 | 4400 | 0.4917        |
 ### Framework Versions

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.6246, 0.1173],
+#         [0.6246, 1.0000, 0.1638],
+#         [0.1173, 0.1638, 1.0000]])
 ```
 <!--
 | 0.0760 | 4300 | 0.4779        |
 | 0.0769 | 4350 | 0.4463        |
 | 0.0778 | 4400 | 0.4917        |
+| 0.0786 | 4450 | 0.5572        |
+| 0.0795 | 4500 | 0.4553        |
+| 0.0804 | 4550 | 0.4598        |
+| 0.0813 | 4600 | 0.5445        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28ab5af774c8b92f73b8b0edbc8679fe71ea4dfe39546414176fa364cb641370
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:8de4e67c75f4b4a7427b2810b985973120199b61a5e131a31184941440675588
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f00d4b90b013eb15ec92f6d8f7effe9c310c62760f7a18e7788ea316eefca59
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a2de7647e7106c94dd0498982cecf5e23bcd6cc4a2d106abd4e0a67ed761131
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb2a47368e57ccb8578c1be62d33027ebea1fe6bd63bffa18038620abaa273d0
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0c125bd90db08e6be99cb0172e0d523e9ccee0b8234c431ef93a30e9252f3b6
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa01c0faab1a39c11ceba075e1e73b81a5689cfd1ac0d27ee7fece150d320be6
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d5f1d0b668240e318afe61a3e255204044d52e6d78b98d08469e3aaa3293711
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02cdb9f646b45dd98fa4d4f96eec0334a5b44f90a00b9690154aab22c0f05613
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c485dc831eee1163015670289f311918797d3da6236e5f584fb1d2dbee1714b9
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07775087911505363,
   "eval_steps": 500,
-  "global_step": 4400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -624,6 +624,34 @@
       "learning_rate": 3.8860424028268556e-05,
       "loss": 0.4917,
       "step": 4400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0812850099839197,
   "eval_steps": 500,
+  "global_step": 4600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.8860424028268556e-05,
       "loss": 0.4917,
       "step": 4400
+    },
+    {
+      "epoch": 0.07863441183227014,
+      "grad_norm": 5.889612197875977,
+      "learning_rate": 3.930212014134276e-05,
+      "loss": 0.5572,
+      "step": 4450
+    },
+    {
+      "epoch": 0.07951794454948667,
+      "grad_norm": 2.7529609203338623,
+      "learning_rate": 3.9743816254416965e-05,
+      "loss": 0.4553,
+      "step": 4500
+    },
+    {
+      "epoch": 0.08040147726670319,
+      "grad_norm": 2.4175944328308105,
+      "learning_rate": 4.018551236749117e-05,
+      "loss": 0.4598,
+      "step": 4550
+    },
+    {
+      "epoch": 0.0812850099839197,
+      "grad_norm": 2.2330217361450195,
+      "learning_rate": 4.0627208480565374e-05,
+      "loss": 0.5445,
+      "step": 4600
     }
   ],
   "logging_steps": 50,