Training in progress, step 1600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.6098, 0.1159],
-#         [0.6098, 1.0000, 0.1612],
-#         [0.1159, 0.1612, 1.0000]])
 ```
 <!--
@@ -1252,6 +1252,10 @@ You can finetune this model on your own dataset.
 | 0.0230 | 1300 | 0.7527        |
 | 0.0239 | 1350 | 0.6124        |
 | 0.0247 | 1400 | 0.6511        |
 ### Framework Versions

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.5831, 0.1091],
+#         [0.5831, 1.0000, 0.1491],
+#         [0.1091, 0.1491, 1.0000]])
 ```
 <!--
 | 0.0230 | 1300 | 0.7527        |
 | 0.0239 | 1350 | 0.6124        |
 | 0.0247 | 1400 | 0.6511        |
+| 0.0256 | 1450 | 0.7047        |
+| 0.0265 | 1500 | 0.6639        |
+| 0.0274 | 1550 | 0.6795        |
+| 0.0283 | 1600 | 0.6341        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50f82f7a3e99ac380e6c75e094e8a65af18eab74684e14810366f1f357e41d60
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:b61a082ce4f7292aa8d2ccb7ce494bab8f2b629411a594d12d441a6b6b4b0923
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:075842c04a113f74618952773066d4169a59d801ccaeebc81d0b3d4f339980ae
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa9774e69968408986a253b355147319deaf91c49e1a0418b0c871e674bc9e7d
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e01322e389b4b8eb7d66bcbb01e77ee7865373b24e1202361d2a597222b56d92
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:c221931df6dfe32bf8ee57ce8b3fe8867a8a29cca913a459c26e86b456713867
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3b03c8c40e45be48b5956b82e1abb2c1ec5641f4fc77e6fdec0942a77964500
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e4420338904222790575638e536e5d59f25756f58070a9dfc3e73602e7fbef5
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f365945c29bbfef6ade3ff30db3a8d39cad690fb183ff7034a93fcc76535d32d
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c5f1576da158c64c9c0e76311baa11fecb51a267280f5c39299d29d917bc401
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02473891608206252,
   "eval_steps": 500,
-  "global_step": 1400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -204,6 +204,34 @@
       "learning_rate": 1.2358657243816255e-05,
       "loss": 0.6511,
       "step": 1400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.028273046950928592,
   "eval_steps": 500,
+  "global_step": 1600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.2358657243816255e-05,
       "loss": 0.6511,
       "step": 1400
+    },
+    {
+      "epoch": 0.025622448799279038,
+      "grad_norm": 2.770859956741333,
+      "learning_rate": 1.280035335689046e-05,
+      "loss": 0.7047,
+      "step": 1450
+    },
+    {
+      "epoch": 0.026505981516495556,
+      "grad_norm": 3.188656806945801,
+      "learning_rate": 1.3242049469964666e-05,
+      "loss": 0.6639,
+      "step": 1500
+    },
+    {
+      "epoch": 0.027389514233712074,
+      "grad_norm": 2.7158899307250977,
+      "learning_rate": 1.368374558303887e-05,
+      "loss": 0.6795,
+      "step": 1550
+    },
+    {
+      "epoch": 0.028273046950928592,
+      "grad_norm": 2.7986080646514893,
+      "learning_rate": 1.4125441696113076e-05,
+      "loss": 0.6341,
+      "step": 1600
     }
   ],
   "logging_steps": 50,